예제 #1
0
 def __init__(self, item_max_len, dim_wd_emb, dim_item_hidden, rnn_config):
     super(ItemBiRNNModule, self).__init__(item_max_len=item_max_len,
                                           dim_wd_emb=dim_wd_emb,
                                           dim_item_hidden=dim_item_hidden)
     rnn_config['num_units'] = dim_item_hidden / 2  # bidirectional
     self.rnn_encoder = BidirectionalRNNEncoder(
         rnn_config, mode=tf.contrib.learn.ModeKeys.TRAIN)
예제 #2
0
    def __init__(self,
                 dim_q_hidden,
                 rnn_config,
                 q_max_len=None,
                 dim_wd_emb=None):
        super(QBiRNNModule, self).__init__(q_max_len=q_max_len,
                                           dim_wd_emb=dim_wd_emb,
                                           dim_q_hidden=dim_q_hidden)

        rnn_config['num_units'] = dim_q_hidden / 2  # bidirectional
        self.rnn_encoder = BidirectionalRNNEncoder(
            rnn_config, mode=tf.contrib.learn.ModeKeys.TRAIN)
예제 #3
0
    def __init__(self, path_max_len, dim_item_hidden, dim_kb_emb,
                 dim_sk_hidden, data_source, rnn_config):
        super(SkBiRNNModule, self).__init__(path_max_len=path_max_len,
                                            dim_item_hidden=dim_item_hidden,
                                            dim_kb_emb=dim_kb_emb,
                                            dim_sk_hidden=dim_sk_hidden)
        self.data_source = data_source
        assert self.data_source in ('kb', 'word', 'both')

        rnn_config['num_units'] = dim_sk_hidden / 2
        self.rnn_encoder = BidirectionalRNNEncoder(
            rnn_config, mode=tf.contrib.learn.ModeKeys.TRAIN)
예제 #4
0
    def encode_question(self, question, question_len, answer, config):
        """
        Encode question with answer-aware attention
        :param question: [B, T, dim]
        :param question_len: [B, T, ]
        :param answer: [B, dim]
        :param config: parameter dict
        :return: [B, hidden_dim]
        """

        # bi-LSTM
        with tf.name_scope("rnn_encoder"):
            rnn_config = dict()
            key_list = [
                "cell_class", "num_units", "dropout_input_keep_prob",
                "dropout_output_keep_prob", "num_layers", "reuse"
            ]
            for key in key_list:
                rnn_config[key] = config[key]
            rnn_encoder = BidirectionalRNNEncoder(rnn_config, config["mode"])
            encoder_output = rnn_encoder.encode(question, question_len)

        # attention mechanism
        with tf.name_scope("attention"):
            att_config = dict()
            key_list = ["num_units"]
            for key in key_list:
                att_config[key] = config[key]

            if config["attention"] == "bah":
                att = AttentionLayerBahdanau(att_config)
                question_hidden = att.build(
                    answer, encoder_output.attention_values,
                    encoder_output.attention_values_length)
            elif config["attention"] == "avg":
                att = AttentionLayerAvg()
                question_hidden = att.build(
                    encoder_output.attention_values,
                    encoder_output.attention_values_length)

        return question_hidden
예제 #5
0
class ItemBiRNNModule(ItemBaseModule):

    # Below: parameters in BidirectionalRNNEncoder
    # key_list = ["cell_class", "num_units", "dropout_input_keep_prob",
    #             "dropout_output_keep_prob", "num_layers", "reuse"]
    # Without any attention mechanism
    def __init__(self, item_max_len, dim_wd_emb, dim_item_hidden, rnn_config):
        super(ItemBiRNNModule, self).__init__(item_max_len=item_max_len,
                                              dim_wd_emb=dim_wd_emb,
                                              dim_item_hidden=dim_item_hidden)
        rnn_config['num_units'] = dim_item_hidden / 2  # bidirectional
        self.rnn_encoder = BidirectionalRNNEncoder(
            rnn_config, mode=tf.contrib.learn.ModeKeys.TRAIN)

    # Input:
    #   item_wd_embedding: (batch, item_max_len, dim_wd_emb)
    #   item_len: (batch, ) as int32
    # Output:
    #   item_wd_hidden: (batch, dim_item_hidden)
    def forward(self, item_wd_embedding, item_len, reuse=None):
        LogInfo.begin_track('ItemBiRNNModule forward: ')

        with tf.variable_scope('ItemBiRNNModule', reuse=reuse):
            # stamps = item_wd_embedding.get_shape().as_list()[1]
            stamps = self.item_max_len
            show_tensor(item_wd_embedding)
            birnn_inputs = tf.unstack(item_wd_embedding,
                                      num=stamps,
                                      axis=1,
                                      name='birnn_inputs')
            # rnn_input: a list of stamps elements: (batch, n_emb)
            encoder_output = self.rnn_encoder.encode(inputs=birnn_inputs,
                                                     sequence_length=item_len,
                                                     reuse=reuse)
            birnn_outputs = tf.stack(
                encoder_output.outputs, axis=1,
                name='birnn_outputs')  # (data_size, q_len, n_hidden_emb)
            LogInfo.logs('birnn_output = %s',
                         birnn_outputs.get_shape().as_list())

            sum_wd_hidden = tf.reduce_sum(birnn_outputs,
                                          axis=1)  # (data_size, n_hidden_emb)
            item_len_mat = tf.cast(tf.expand_dims(item_len, axis=1),
                                   dtype=tf.float32)  # (data_size, 1) as float
            item_wd_hidden = tf.div(
                sum_wd_hidden,
                tf.maximum(item_len_mat, 1),  # avoid dividing by 0
                name='item_wd_hidden')  # (data_size, n_hidden_emb)
            LogInfo.logs('item_wd_hidden = %s',
                         item_wd_hidden.get_shape().as_list())

        LogInfo.end_track()
        return item_wd_hidden
 def apply_seq_repr(self, input_emb, input_len, mode):
     assert self.repr_mode in ('raw', 'cnn', 'rnn')
     LogInfo.logs('apply_seq_repr: %s', self.repr_mode)
     if self.repr_mode == 'raw':
         return input_emb
     elif self.repr_mode == 'cnn':
         return tf.layers.conv1d(inputs=input_emb,
                                 padding='same',
                                 activation=tf.nn.relu,
                                 reuse=tf.AUTO_REUSE,
                                 **self.cnn_config)  # (ds, x_max_len, num_filters == dim_hidden)
     else:
         encoder_args = {'config': self.rnn_config, 'mode': mode}
         rnn_encoder = BidirectionalRNNEncoder(**encoder_args)
         return seq_encoding(emb_input=input_emb, len_input=input_len, encoder=rnn_encoder)
예제 #7
0
    def forward(self, v_emb, v_len, tag_indices, mode):
        """
        :param v_emb:          (ds, q_max_len, dim_emb)
        :param v_len:          (ds,) as int
        :param tag_indices:    (ds, q_max_len) as int
        :param mode: TRAIN / INFER
        """
        LogInfo.begin_track('Build kernel: [segment_kernel]')
        assert mode in (tf.contrib.learn.ModeKeys.INFER,
                        tf.contrib.learn.ModeKeys.TRAIN)
        encoder_args = {'config': self.rnn_config, 'mode': mode}
        seg_encoder = BidirectionalRNNEncoder(**encoder_args)

        with tf.variable_scope('segment_kernel', reuse=tf.AUTO_REUSE):
            transition = tf.get_variable(
                name='transition',
                dtype=tf.float32,
                shape=[self.num_classes, self.num_classes
                       ])  # (num_classes, num_classes) as transition matrix
            v_hidden = seq_encoding(
                emb_input=v_emb, len_input=v_len,
                encoder=seg_encoder)  # (ds, q_max_len, dim_seg_hidden)
            v_hidden_flat = tf.reshape(
                v_hidden,
                [-1, self.dim_seg_hidden])  # (ds * q_max_len, dim_seg_hidden)
            seg_logits = tf.reshape(
                tf.contrib.layers.fully_connected(inputs=v_hidden_flat,
                                                  num_outputs=self.num_classes,
                                                  activation_fn=None,
                                                  scope='fc'),
                shape=[-1, self.q_max_len, self.num_classes],
                name='seg_logits')  # (ds, q_max_len, num_classes)
            log_lik, _ = tf.contrib.crf.crf_log_likelihood(
                inputs=seg_logits,
                tag_indices=tag_indices,
                sequence_lengths=v_len,
                transition_params=transition)
            best_seg, viterbi_score = tf.contrib.crf.crf_decode(
                potentials=seg_logits,
                transition_params=transition,
                sequence_length=v_len)
            # output_seq: (ds, q_max_len) as int
        LogInfo.end_track()

        return v_hidden, seg_logits, log_lik, best_seg
 def get_seq_hidden(self, seq_emb, seq_len, mode):
     encoder_args = {'config': self.rnn_config, 'mode': mode}
     rnn_encoder = BidirectionalRNNEncoder(**encoder_args)
     if self.seq_merge_mode == 'fwbw':
         return seq_encoding(emb_input=seq_emb,
                             len_input=seq_len,
                             encoder=rnn_encoder,
                             fwbw=True)
     else:
         seq_hidden = seq_encoding(emb_input=seq_emb,
                                   len_input=seq_len,
                                   encoder=rnn_encoder)
         if self.seq_merge_mode == 'max':
             return seq_hidden_max_pooling(seq_hidden_input=seq_hidden,
                                           len_input=seq_len)
         else:  # avg
             return seq_hidden_averaging(seq_hidden_input=seq_hidden,
                                         len_input=seq_len)
예제 #9
0
class QBiRNNModule(QBaseModule):

    # Below: parameters in BidirectionalRNNEncoder
    # key_list = ["cell_class", "num_units", "dropout_input_keep_prob",
    #             "dropout_output_keep_prob", "num_layers", "reuse"]
    # Without any attention mechanism
    def __init__(self,
                 dim_q_hidden,
                 rnn_config,
                 q_max_len=None,
                 dim_wd_emb=None):
        super(QBiRNNModule, self).__init__(q_max_len=q_max_len,
                                           dim_wd_emb=dim_wd_emb,
                                           dim_q_hidden=dim_q_hidden)

        rnn_config['num_units'] = dim_q_hidden / 2  # bidirectional
        self.rnn_encoder = BidirectionalRNNEncoder(
            rnn_config, mode=tf.contrib.learn.ModeKeys.TRAIN)

    def forward(self, q_embedding, q_len, reuse=None):
        LogInfo.begin_track('QBiRNNModule forward: ')

        with tf.variable_scope('QBiRNNModule', reuse=reuse):
            # stamps = q_embedding.get_shape().as_list()[1]
            stamps = self.q_max_len
            birnn_inputs = tf.unstack(q_embedding,
                                      num=stamps,
                                      axis=1,
                                      name='birnn_inputs')
            # rnn_input: a list of stamps elements: (batch, n_emb)
            encoder_output = self.rnn_encoder.encode(inputs=birnn_inputs,
                                                     sequence_length=q_len,
                                                     reuse=reuse)
            q_hidden = tf.stack(
                encoder_output.outputs, axis=1,
                name='q_hidden')  # (batch, q_max_len, dim_q_hidden)

        LogInfo.end_track()
        return q_hidden
예제 #10
0
    def _build_graph(self):
        self.context_idx = tf.placeholder(
            dtype=tf.int32, shape=[None, self.config.get("max_seq_len")])
        self.context_seq = tf.placeholder(dtype=tf.int32, shape=[
            None,
        ])
        self.pinlei_idx = tf.placeholder(dtype=tf.int32, shape=[
            None,
        ])

        with tf.device('/cpu:0'), tf.name_scope("embedding_layer"):
            # LogInfo.logs("Embedding shape: %s (%d*%d).", self.embedding.shape,
            #              self.config.get("vocab_size"), self.config.get("embedding_dim"))
            term_embedding = tf.get_variable(
                name="embedding",
                shape=[
                    self.config.get("vocab_size"),
                    self.config.get("embedding_dim")
                ],
                dtype=tf.float32,
                initializer=tf.constant_initializer(self.embedding))
            self.context_embedding = tf.nn.embedding_lookup(
                term_embedding, self.context_idx)
            self.pinlei_embedding = tf.nn.embedding_lookup(
                term_embedding, self.pinlei_idx)
            # shape = [max_seq_len, batch_size, embedding_dim], feed to rnn_encoder
            self.context_slice = [
                tf.squeeze(_input, [1])
                for _input in tf.split(self.context_embedding,
                                       self.config.get("max_seq_len"),
                                       axis=1)
            ]

        # bi-LSTM
        with tf.name_scope("rnn_encoder"):
            rnn_config = dict()
            key_list = [
                "cell_class", "num_units", "dropout_input_keep_prob",
                "dropout_output_keep_prob", "num_layers", "reuse"
            ]
            for key in key_list:
                rnn_config[key] = self.config.get(key)
            rnn_encoder = BidirectionalRNNEncoder(rnn_config, self.mode)
            self.encoder_output = rnn_encoder.encode(self.context_slice,
                                                     self.context_seq)

        # attention mechanism
        with tf.name_scope("attention"):
            att_config = dict()
            key_list = ["num_units"]
            for key in key_list:
                att_config[key] = self.config.get(key)

            if self.config.get("attention") == "bah":
                att = AttentionLayerBahdanau_old(att_config)
                self.query_hidden = att.build(
                    self.pinlei_embedding,
                    self.encoder_output.attention_values,
                    self.encoder_output.attention_values_length)
            elif self.config.get("attention") == "avg":
                att = AttentionLayerAvg_old()
                self.query_hidden = att.build(
                    self.encoder_output.attention_values,
                    self.encoder_output.attention_values_length)

        self.hidden_dim = self.query_hidden.get_shape().as_list()[-1]

        # training parameters
        with tf.name_scope("parameters"):
            self.W_p = tf.get_variable(
                name="W_p",
                shape=[self.config.get("embedding_dim"), self.hidden_dim],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
            self.b_p = tf.get_variable(
                name="b_p",
                shape=[self.hidden_dim],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0.0))
            self.W_f = tf.get_variable(
                name="W_f",
                shape=[self.hidden_dim * 2, self.hidden_dim],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
            self.b_f = tf.get_variable(
                name="b_f",
                shape=[self.hidden_dim],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0.0))
            self.W_o = tf.get_variable(
                name="W_o",
                shape=[self.hidden_dim, 1],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
            self.b_o = tf.get_variable(
                name="b_o",
                shape=[1],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0.0))
        # above bi-LSTM + attention
        with tf.name_scope("score"):
            self.pinlei_hidden = self.transfer(
                tf.add(tf.matmul(self.pinlei_embedding, self.W_p), self.b_p))
            self.final = self.transfer(
                tf.add(
                    tf.matmul(
                        tf.concat([self.query_hidden, self.pinlei_hidden], 1),
                        self.W_f), self.b_f))
            # self.score = tf.add(tf.matmul(self.final, self.W_o), self.b_o)  # tensorflow 1.0.0
            self.score = tf.nn.xw_plus_b(self.final, self.W_o, self.b_o)

        # hinge loss
        if self.mode == tf.contrib.learn.ModeKeys.TRAIN:
            self.loss = hinge_loss(
                self.score,
                int(self.config.get("batch_size") / self.config.get("PN")),
                self.config.get("PN"), self.config.get("margin"))
            self.train_op = get_optimizer(self.config.get("optimizer"),
                                          self.config.get("lr")).minimize(
                                              self.loss)
    def forward(self, el_size, qw_emb, qw_len,
                pw_sup_emb, pw_sup_len, type_trans,
                el_sup_mask, el_type_signa, el_indv_feats, el_comb_feats, mode):
        """
        Note: number of paths in a schema == number of entities in the schema
        local_mem_size: the local number of relevant paths in the current batch.
        :param el_size:         (ds, )
        :param qw_emb:          (ds, el_max_size, qw_max_len, dim_emb)
        :param qw_len:          (ds, el_max_size)
        :param pw_sup_emb:      (local_mem_size, pw_max_len, dim_emb)
        :param pw_sup_len:      (local_mem_size,)
        :param type_trans:      (local_mem_size, dim_type)
        :param el_sup_mask:     (ds, el_max_size, local_mem_size)
        :param el_type_signa:   (ds, el_max_size, dim_type)
        :param el_indv_feats:   (ds, el_max_size, el_feat_size)
        :param el_comb_feats:   (ds, 1)
        :param mode:    TRAIN / INFER
        """
        """
        180416:
        Let's assume ds=16*2=32, el_max_size=3, qw_max_len=20, dim_emb=300, local_mem_size=6K
        Then ds*el_max_size*qw_max_len ~= 2K
        """
        LogInfo.begin_track('Build kernel: [el_kernel]')
        assert mode in (tf.contrib.learn.ModeKeys.INFER, tf.contrib.learn.ModeKeys.TRAIN)

        rnn_encoder = None
        if self.rnn_config is not None:
            encoder_args = {'config': self.rnn_config, 'mode': mode}
            rnn_encoder = BidirectionalRNNEncoder(**encoder_args)
        raw_shape = tf.shape(el_sup_mask)
        el_max_size = raw_shape[1]
        local_mem_size = raw_shape[2]
        dim_type = tf.shape(type_trans)[1]

        """ Possible reshapes """
        qw_emb = tf.reshape(qw_emb, [-1, self.qw_max_len, self.dim_emb])
        # (ds * el_max_size, qw_max_len, dim_emb)
        qw_len = tf.reshape(qw_len, [-1])       # (ds * el_max_size)

        """ Calculate attention / non-attention question representation """
        pw_sup_repr = seq_encoding_with_aggregation(emb_input=pw_sup_emb, len_input=pw_sup_len,
                                                    rnn_encoder=rnn_encoder,
                                                    seq_merge_mode=self.seq_merge_mode)
        # (local_mem_size, dim_hidden)
        if self.att_config is not None:
            att_func = self.att_config['att_func']
            assert att_func == 'dot'        # TODO: Currently only support dot product
            qw_hidden = seq_encoding(emb_input=qw_emb, len_input=qw_len, encoder=rnn_encoder)
            # (ds*el_max_size, qw_max_len, dim_hidden)
            qw_mask = tf.sequence_mask(lengths=qw_len,
                                       maxlen=self.qw_max_len,
                                       dtype=tf.float32,
                                       name='qw_mask')  # (ds*el_max_size, qw_max_len)
            flat_qw_hidden = tf.reshape(qw_hidden, shape=[-1, self.dim_hidden], name='flat_qw_hidden')
            # (ds*el_max_size*qw_max_len, dim_hidden)

            """ Step 1: Very simple & fast way to calculate dot attention """
            raw_mutual_att_mat = tf.matmul(
                flat_qw_hidden,
                tf.transpose(pw_sup_repr),
                name='raw_mutual_att_mat'
            )   # (ds*el_max_size*qw_max_len, local_mem_size)
            mutual_att_mat = tf.reshape(
                raw_mutual_att_mat,
                shape=[-1, self.qw_max_len, local_mem_size],
                name='mutual_att_mat')
            # (ds*el_max_size, qw_max_len, local_mem_size)

            """ Step 2: Prepare masked att_mat and normalized distribution """
            qw_mask_3dim = tf.expand_dims(qw_mask, axis=-1, name='qw_mask_3dim')
            # (ds*el_max_size, qw_max_len, 1)
            masked_att_mat = (
                qw_mask_3dim * mutual_att_mat +
                (1. - qw_mask_3dim) * mutual_att_mat * tf.float32.min
            )   # (ds*el_max_size, qw_max_len, local_mem_size)
            unnorm_weight = tf.transpose(masked_att_mat, [0, 2, 1], name='masked_att_mat')
            # (ds*el_max_size, local_mem_size, qw_max_len)
            norm_weight = tf.nn.softmax(unnorm_weight, name='norm_weight')

            """ Step 3: Got final qw_repr w.r.t different support paths """
            qw_repr = tf.matmul(norm_weight, qw_hidden, name='qw_repr')
            # batch_matmul: (ds*el_max_size, local_mem_size, qw_max_len)

        else:       # noAtt, very simple
            raw_qw_repr = seq_encoding_with_aggregation(emb_input=qw_emb, len_input=qw_len,
                                                        rnn_encoder=rnn_encoder,
                                                        seq_merge_mode=self.seq_merge_mode)
            # (ds*el_max_size, dim_hidden)
            qw_repr = tf.expand_dims(raw_qw_repr, axis=1, name='qw_repr')
            # (ds*el_max_size, 1, dim_hidden)

        with tf.variable_scope('el_kernel', reuse=tf.AUTO_REUSE):
            """ Calculate cosine similarity """
            flat_pw_sup_repr = tf.expand_dims(pw_sup_repr, axis=0, name='flat_pw_sup_repr')
            # (1, local_mem_size, dim_hidden)
            sim_score = cosine_sim(
                lf_input=qw_repr,               # (ds*el_max_size, [1 or local_mem_size], qw_max_len)
                rt_input=flat_pw_sup_repr       # (1, local_mem_size, dim_hidden)
            )
            # (ds*el_max_size, local_mem_size)

            """ Turning into type distribution """
            flat_el_sup_mask = tf.reshape(el_sup_mask, shape=[-1, local_mem_size], name='flat_el_sup_mask')
            # (ds*el_max_size, local_mem_size)
            mask_score = flat_el_sup_mask * sim_score + (1. - flat_el_sup_mask) * tf.float32.min
            pred_prob = tf.nn.softmax(logits=mask_score, name='pred_prob')
            # (ds*el_max_size, local_mem_size)
            raw_type_prob = tf.matmul(pred_prob, type_trans, name='raw_type_prob')
            # (ds*el_max_size, dim_type)
            type_prob = tf.reshape(raw_type_prob, shape=[-1, el_max_size, dim_type], name='type_prob')
            # (ds, el_max_size, dim_type)
            type_match_score = tf.reduce_sum(el_type_signa*type_prob,
                                             axis=-1, keep_dims=True,
                                             name='type_match_score')   # (ds, el_max_size, 1)

            """ Feature concat and produce scores """
            el_indv_concat = tf.concat([type_match_score, el_indv_feats],
                                       axis=-1, name='el_indv_concat')  # (ds, el_max_size, 1+el_feat_size)
            el_mask = tf.sequence_mask(lengths=el_size, maxlen=el_max_size,
                                       dtype=tf.float32, name='el_mask')    # (ds, el_max_size)
            sum_indv_feats = tf.reduce_sum(
                el_indv_concat * tf.expand_dims(el_mask, axis=-1),
                axis=1, name='sum_indv_feats'
            )   # (ds, 1+el_feat_size)
            final_feats = tf.concat([sum_indv_feats, el_comb_feats], axis=-1, name='final_feats')
            # (ds, 1+el_max_size+1) --> type_match + indv_feats + comb_feat
            el_score = tf.contrib.layers.fully_connected(
                inputs=final_feats,
                num_outputs=1,
                activation_fn=None,
                scope='out_fc',
                reuse=tf.AUTO_REUSE
            )  # (ds, 1), representing type matching score

        LogInfo.end_track()
        return el_score, final_feats
예제 #12
0
from kangqi.util.LogUtil import LogInfo

max_len = 10
dim_emb = 30
n_words = 500
dim_hidden = 16

v_input = tf.placeholder(tf.int32, shape=[None, max_len])
v_len = tf.placeholder(tf.int32, shape=[None])
rnn_config = {
    'cell_class': 'GRU',
    'num_units': dim_hidden,
    'reuse': tf.AUTO_REUSE
}
encoder_args = {'config': rnn_config, 'mode': tf.contrib.learn.ModeKeys.INFER}
rnn_encoder = BidirectionalRNNEncoder(**encoder_args)

with tf.variable_scope('embedding_lookup', reuse=tf.AUTO_REUSE):
    with tf.device('/cpu:0'):
        w_embedding_init = tf.placeholder(dtype=tf.float32,
                                          shape=(n_words, dim_emb),
                                          name='w_embedding_init')
        w_embedding = tf.get_variable(name='w_embedding',
                                      initializer=w_embedding_init)
        v_emb = tf.nn.embedding_lookup(params=w_embedding, ids=v_input)
    v_hidden = seq_encoding(emb_input=v_emb,
                            len_input=v_len,
                            encoder=rnn_encoder,
                            reuse=tf.AUTO_REUSE)
    LogInfo.logs('v_hidden: %s', v_hidden.get_shape().as_list())
예제 #13
0
    def forward(self, el_size, qw_emb, qw_len, pw_sup_emb, pw_sup_len,
                sup_size, type_trans, el_type_signa, el_indv_feats,
                el_comb_feats, mode):
        """
        Note: number of paths in a schema == number of entities in the schema
        :param el_size:         (ds, )
        :param qw_emb:          (ds, path_max_size, qw_max_len, dim_emb)
        :param qw_len:          (ds, path_max_size)
        :param pw_sup_emb:      (ds, path_max_size, sup_max_size, pw_max_len, dim_emb)
        :param pw_sup_len:      (ds, path_max_size, sup_max_size)
        :param sup_size:        (ds, path_max_size)
        :param type_trans:      (ds, path_max_size, sup_max_size, dim_type)
        :param el_type_signa:   (ds, el_max_size, dim_type)
        :param el_indv_feats:   (ds, el_max_size, el_feat_size)
        :param el_comb_feats:   (ds, 1)
        :param mode:    TRAIN / INFER
        """
        LogInfo.begin_track('Build kernel: [el_kernel]')
        assert mode in (tf.contrib.learn.ModeKeys.INFER,
                        tf.contrib.learn.ModeKeys.TRAIN)

        rnn_encoder = None
        if self.rnn_config is not None:
            encoder_args = {'config': self.rnn_config, 'mode': mode}
            rnn_encoder = BidirectionalRNNEncoder(**encoder_args)

        raw_shape = tf.shape(pw_sup_len)
        dyn_el_max_size = raw_shape[1]
        dyn_sup_max_size = raw_shape[2]
        """ Possible reshapes """
        qw_emb = tf.reshape(qw_emb, [-1, self.qw_max_len, self.dim_emb])
        # (ds * el_max_size, qw_max_len, dim_emb)
        qw_len = tf.reshape(qw_len, [-1])  # (ds * el_max_size)

        pw_sup_emb = tf.reshape(pw_sup_emb,
                                [-1, self.pw_max_len, self.dim_emb])
        # (ds * el_max_size * sup_max_size, pw_max_len, dim_emb)
        pw_sup_len = tf.reshape(pw_sup_len, [-1])
        """ Calculate attention / non-attention question representation """
        pw_sup_repr = seq_encoding_with_aggregation(
            emb_input=pw_sup_emb,
            len_input=pw_sup_len,
            rnn_encoder=rnn_encoder,
            seq_merge_mode=self.seq_merge_mode)
        # (ds*el_max_size*sup_max_size, dim_hidden)

        if self.att_config is not None:
            dim_att_len = self.att_config['dim_att_hidden']
            att_func = self.att_config['att_func']
            qw_hidden = seq_encoding(emb_input=qw_emb,
                                     len_input=qw_len,
                                     encoder=rnn_encoder)
            # (ds * el_max_size, qw_max_len, dim_hidden)
            qw_mask = tf.sequence_mask(lengths=qw_len,
                                       maxlen=self.qw_max_len,
                                       dtype=tf.float32,
                                       name='qw_mask')  # (DS, qw_max_len)
            tile_qw_hidden = tf.tile(
                tf.expand_dims(
                    qw_hidden,
                    axis=1),  # (ds*el_max_size, 1, qw_max_len, dim_hidden)
                multiples=[1, dyn_sup_max_size, 1, 1],
                name='tile_qw_hidden'
            )  # (ds*el_max_size, sup_max_size, qw_max_len, dim_hidden)
            tile_qw_mask = tf.tile(
                tf.expand_dims(qw_mask, axis=1),
                multiples=[1, dyn_sup_max_size, 1],
                name='tile_qw_mask'
            )  # (ds*el_max_size, sup_max_size, qw_max_len)

            expand_qw_mask = tf.reshape(tile_qw_mask, [-1, self.qw_max_len])
            expand_qw_hidden = tf.reshape(
                tile_qw_hidden, [-1, self.qw_max_len, self.dim_hidden])
            # (ds*el_max_size*sup_max_size, qw_max_len, dim_hidden)

            simple_att = SimpleAttention(lf_max_len=self.qw_max_len,
                                         dim_att_hidden=dim_att_len,
                                         att_func=att_func)
            qw_att_repr, _, _ = simple_att.forward(lf_input=expand_qw_hidden,
                                                   lf_mask=expand_qw_mask,
                                                   fix_rt_input=pw_sup_repr)
            # (ds*el_max_size*sup_max_size, dim_hidden)
            final_qw_repr = qw_att_repr
        else:
            qw_repr = seq_encoding_with_aggregation(
                emb_input=qw_emb,
                len_input=qw_len,
                rnn_encoder=rnn_encoder,
                seq_merge_mode=self.seq_merge_mode)
            # (ds*el_max_size, dim_hidden)
            tile_qw_repr = tf.tile(
                tf.expand_dims(qw_repr, axis=1),
                multiples=[1, dyn_sup_max_size, 1],
                name='tile_qw_repr'
            )  # (ds*el_max_size, sup_max_size, dim_hidden)
            expand_qw_repr = tf.reshape(tile_qw_repr, [-1, self.dim_hidden])
            final_qw_repr = expand_qw_repr

        with tf.variable_scope('el_kernel', reuse=tf.AUTO_REUSE):
            """ Calculate cosine similarity, and turning into type distribution """
            sim_score = cosine_sim(
                lf_input=final_qw_repr,
                rt_input=pw_sup_repr)  # (ds*el_max_size, sup_max_size)
            sim_score = tf.reshape(
                sim_score, shape=raw_shape,
                name='sim_score')  # (ds, el_max_size, sup_max_size)
            sup_mask = tf.sequence_mask(
                lengths=sup_size,
                maxlen=dyn_sup_max_size,
                dtype=tf.float32,
                name='sup_mask')  # (ds, el_max_size, sup_max_size)
            mask_score = sup_mask * sim_score + (1. -
                                                 sup_mask) * tf.float32.min
            pred_prob = tf.nn.softmax(
                logits=mask_score,
                name='pred_prob')  # (ds, el_max_size, sup_max_size)
            type_prob = tf.matmul(
                a=tf.expand_dims(pred_prob,
                                 axis=2),  # (ds, el_max_size, 1, sup_max_size)
                b=type_trans  # (ds, el_max_size, sup_max_size, dim_type)
            )  # (ds, el_max_size, 1, dim_type)
            type_prob = tf.squeeze(
                input=type_prob, axis=2,
                name='type_prob')  # (ds, el_max_size, dim_type)
            type_match_score = tf.reduce_sum(
                el_type_signa * type_prob,
                axis=-1,
                keep_dims=True,
                name='type_match_score')  # (ds, el_max_size, 1)
            """ Feature concat and produce scores """
            el_indv_concat = tf.concat(
                [type_match_score, el_indv_feats],
                axis=-1,
                name='el_indv_concat')  # (ds, el_max_size, 1+el_feat_size)
            el_mask = tf.sequence_mask(lengths=el_size,
                                       maxlen=dyn_el_max_size,
                                       dtype=tf.float32,
                                       name='el_mask')  # (ds, el_max_size)
            sum_indv_feats = tf.reduce_sum(
                el_indv_concat * tf.expand_dims(el_mask, axis=-1),
                axis=1,
                name='sum_indv_feats')  # (ds, 1+el_feat_size)
            final_feats = tf.concat([sum_indv_feats, el_comb_feats],
                                    axis=-1,
                                    name='final_feats')
            # (ds, 1+el_max_size+1) --> type_match + indv_feats + comb_feat
            el_score = tf.contrib.layers.fully_connected(
                inputs=final_feats,
                num_outputs=1,
                activation_fn=None,
                scope='out_fc',
                reuse=tf.AUTO_REUSE
            )  # (ds, 1), representing type matching score

        LogInfo.end_track()
        return el_score, final_feats
예제 #14
0
    def __init__(self,
                 sess,
                 n_words,
                 n_preds,
                 dim_emb,
                 q_max_len,
                 path_max_len,
                 pword_max_len,
                 dim_hidden,
                 rnn_cell,
                 merge_config,
                 reuse=tf.AUTO_REUSE,
                 verbose=0):
        LogInfo.begin_track('SimpqEvalModel Building ...')
        super(SimpqEvalModel, self).__init__(sess=sess, verbose=verbose)

        # ======== declare sub-modules (the same as optm part) ======== #

        num_units = dim_hidden / 2  # bidirectional
        rnn_config = {'num_units': num_units, 'cell_class': rnn_cell}
        encoder_args = {
            'config': rnn_config,
            'mode': tf.contrib.learn.ModeKeys.TRAIN
        }

        q_encoder = BidirectionalRNNEncoder(**encoder_args)
        pred_encoder = BidirectionalRNNEncoder(**encoder_args)
        pword_encoder = BidirectionalRNNEncoder(**encoder_args)
        merge_func = get_merge_function(merge_config=merge_config,
                                        dim_hidden=dim_hidden,
                                        reuse=reuse)
        LogInfo.logs('Sub-modules declared.')

        # ======== define tensors ======== #

        q_words_input = tf.placeholder(
            dtype=tf.int32, shape=[None, q_max_len],
            name='q_words_input')  # (data_size, q_max_len)
        q_words_len_input = tf.placeholder(
            dtype=tf.int32, shape=[None],
            name='q_words_len_input')  # (data_size, )
        preds_input = tf.placeholder(
            dtype=tf.int32, shape=[None, path_max_len],
            name='preds_input')  # (data_size, path_max_len)
        preds_len_input = tf.placeholder(
            dtype=tf.int32, shape=[None],
            name='preds_len_input')  # (data_size, )
        pwords_input = tf.placeholder(
            dtype=tf.int32, shape=[None, pword_max_len],
            name='pwords_input')  # (data_size, path_max_len)
        pwords_len_input = tf.placeholder(
            dtype=tf.int32, shape=[None],
            name='pwords_len_input')  # (data_size, )
        self.eval_input_tf_list = [
            q_words_input, q_words_len_input, preds_input, preds_len_input,
            pwords_input, pwords_len_input
        ]
        LogInfo.begin_track('Showing %d input tensors:',
                            len(self.eval_input_tf_list))
        for tensor in self.eval_input_tf_list:
            show_tensor(tensor)
        LogInfo.end_track()

        # ======== start building model ======== #

        with tf.variable_scope('Embedding_Lookup', reuse=reuse):
            with tf.device("/cpu:0"):
                self.w_embedding_init = tf.placeholder(dtype=tf.float32,
                                                       shape=(n_words,
                                                              dim_emb),
                                                       name='w_embedding_init')
                self.p_embedding_init = tf.placeholder(dtype=tf.float32,
                                                       shape=(n_preds,
                                                              dim_emb),
                                                       name='p_embedding_init')
                w_embedding = tf.get_variable(
                    name='w_embedding', initializer=self.w_embedding_init)
                p_embedding = tf.get_variable(
                    name='p_embedding', initializer=self.p_embedding_init)

                q_words_embedding = tf.nn.embedding_lookup(
                    params=w_embedding, ids=q_words_input,
                    name='q_embedding')  # (batch, q_max_len, dim_emb)
                preds_embedding = tf.nn.embedding_lookup(
                    params=p_embedding,
                    ids=preds_input,
                    name='preds_embedding')  # (batch, path_max_len, dim_emb)
                pwords_embedding = tf.nn.embedding_lookup(
                    params=w_embedding,
                    ids=pwords_input,
                    name='pwords_embedding')  # (batch, pword_max_len, dim_emb)

        with tf.variable_scope('Question', reuse=reuse):
            q_words_hidden = seq_encoding(
                emb_input=q_words_embedding,
                len_input=q_words_len_input,
                encoder=q_encoder,
                reuse=reuse)  # (data_size, q_max_len, dim_emb)
            q_hidden = tf.reduce_max(
                q_words_hidden, axis=1,
                name='q_hidden')  # (data_size, dim_hidden)

        with tf.variable_scope('Schema', reuse=reuse):
            with tf.variable_scope('Path', reuse=reuse):
                preds_hidden = seq_encoding(
                    emb_input=preds_embedding,
                    len_input=preds_len_input,
                    encoder=pred_encoder,
                    reuse=reuse)  # (data_size, path_max_len, dim_emb)
            with tf.variable_scope('Pword', reuse=reuse):
                pwords_hidden = seq_encoding(
                    emb_input=pwords_embedding,
                    len_input=pwords_len_input,
                    encoder=pword_encoder,
                    reuse=reuse)  # (data_size, pword_max_len, dim_emb)
            schema_hidden = schema_encoding(preds_hidden=preds_hidden,
                                            preds_len=preds_len_input,
                                            pwords_hidden=pwords_hidden,
                                            pwords_len=pwords_len_input)

        with tf.variable_scope('Merge', reuse=reuse):
            # self.score = cosine_sim(lf_input=q_hidden, rt_input=schema_hidden)    # (data_size, )
            self.score = merge_func(q_hidden, schema_hidden)  # (data_size, )
            # Now final score defined.

        self.eval_summary = tf.summary.merge_all(key='eval')
        LogInfo.logs('* final score defined.')

        LogInfo.end_track()
예제 #15
0
    def __init__(self, sess, n_words, n_preds, dim_emb,
                 q_max_len, path_max_len, pword_max_len,
                 dim_hidden, rnn_cell, merge_config,
                 margin, learning_rate, optm_name,
                 reuse=tf.AUTO_REUSE, verbose=0):
        LogInfo.begin_track('SimpqOptmModel Building ...')
        super(SimpqOptmModel, self).__init__(sess=sess, ob_batch_num=100, verbose=verbose)

        assert optm_name in ('Adam', 'Adadelta', 'Adagrad', 'GradientDescent')
        optm_name += 'Optimizer'

        # ======== declare sub-modules ======== #

        num_units = dim_hidden / 2          # bidirectional
        rnn_config = {'num_units': num_units, 'cell_class': rnn_cell}
        encoder_args = {'config': rnn_config, 'mode': tf.contrib.learn.ModeKeys.TRAIN}

        q_encoder = BidirectionalRNNEncoder(**encoder_args)
        pred_encoder = BidirectionalRNNEncoder(**encoder_args)
        pword_encoder = BidirectionalRNNEncoder(**encoder_args)
        merge_func = get_merge_function(merge_config=merge_config, dim_hidden=dim_hidden, reuse=reuse)
        LogInfo.logs('Sub-modules declared.')

        # ======== define tensors ======== #

        q_words_input = tf.placeholder(dtype=tf.int32,
                                       shape=[None, q_max_len],
                                       name='q_words_input')            # (data_size, q_max_len)
        q_words_len_input = tf.placeholder(dtype=tf.int32,
                                           shape=[None],
                                           name='q_words_len_input')    # (data_size, )
        self.optm_input_tf_list = [q_words_input, q_words_len_input]

        sc_tensor_groups = []       # [ pos_tensors, neg_tensors ]
        for cate in ('pos', 'neg'):
            preds_input = tf.placeholder(dtype=tf.int32,
                                         shape=[None, path_max_len],
                                         name=cate+'_preds_input')          # (data_size, path_max_len)
            preds_len_input = tf.placeholder(dtype=tf.int32,
                                             shape=[None],
                                             name=cate+'_preds_len_input')  # (data_size, )
            pwords_input = tf.placeholder(dtype=tf.int32,
                                          shape=[None, pword_max_len],
                                          name=cate+'_pwords_input')        # (data_size, pword_max_len)
            pwords_len_input = tf.placeholder(dtype=tf.int32,
                                              shape=[None],
                                              name=cate+'_pwords_len_input')    # (data_size, )
            tensor_group = [preds_input, preds_len_input, pwords_input, pwords_len_input]
            sc_tensor_groups.append(tensor_group)
            self.optm_input_tf_list += tensor_group
        LogInfo.begin_track('Showing %d input tensors:', len(self.optm_input_tf_list))
        for tensor in self.optm_input_tf_list:
            show_tensor(tensor)
        LogInfo.end_track()

        # ======== start building model ======== #

        with tf.variable_scope('Embedding_Lookup', reuse=reuse):
            with tf.device('/cpu:0'):
                self.w_embedding_init = tf.placeholder(dtype=tf.float32,
                                                       shape=(n_words, dim_emb),
                                                       name='w_embedding_init')
                self.p_embedding_init = tf.placeholder(dtype=tf.float32,
                                                       shape=(n_preds, dim_emb),
                                                       name='p_embedding_init')
                w_embedding = tf.get_variable(name='w_embedding',
                                              initializer=self.w_embedding_init)
                p_embedding = tf.get_variable(name='p_embedding',
                                              initializer=self.p_embedding_init)

                q_words_embedding = tf.nn.embedding_lookup(params=w_embedding,
                                                           ids=q_words_input,
                                                           name='q_embedding')      # (batch, q_max_len, dim_emb)

        with tf.variable_scope('Question', reuse=reuse):
            q_words_hidden = seq_encoding(
                emb_input=q_words_embedding,
                len_input=q_words_len_input,
                encoder=q_encoder, reuse=reuse)         # (data_size, q_max_len, dim_emb)
            # q_hidden = tf.reduce_max(q_words_hidden,
            #                          axis=1, name='q_hidden')    # (data_size, dim_hidden)
            q_hidden = seq_hidden_max_pooling(seq_hidden_input=q_words_hidden,
                                              len_input=q_words_len_input)
            # TODO: Currently we just follow yu2017.

        logits_list = []        # store two tensors: positive and negative score
        for cate, sc_tensor_group in zip(('pos', 'neg'), sc_tensor_groups):
            LogInfo.logs('Calculate score at %s side ...', cate)
            preds_input, preds_len_input, pwords_input, pwords_len_input = sc_tensor_group
            with tf.variable_scope('Embedding_Lookup', reuse=reuse):
                with tf.device("/cpu:0"):
                    preds_embedding = tf.nn.embedding_lookup(
                        params=p_embedding, ids=preds_input, name='preds_embedding'
                    )       # (batch, path_max_len, dim_emb)
                    pwords_embedding = tf.nn.embedding_lookup(
                        params=w_embedding, ids=pwords_input, name='pwords_embedding'
                    )       # (batch, pword_max_len, dim_emb)
            with tf.variable_scope('Schema', reuse=reuse):
                with tf.variable_scope('Path', reuse=reuse):
                    preds_hidden = seq_encoding(
                        emb_input=preds_embedding,
                        len_input=preds_len_input,
                        encoder=pred_encoder, reuse=reuse)      # (data_size, path_max_len, dim_hidden)
                with tf.variable_scope('Pword', reuse=reuse):
                    pwords_hidden = seq_encoding(
                        emb_input=pwords_embedding,
                        len_input=pwords_len_input,
                        encoder=pword_encoder, reuse=reuse)     # (data_size, pword_max_len, dim_hidden)
                schema_hidden = schema_encoding(
                    preds_hidden=preds_hidden, preds_len=preds_len_input,
                    pwords_hidden=pwords_hidden, pwords_len=pwords_len_input)
            with tf.variable_scope('Merge', reuse=reuse):
                # logits = cosine_sim(lf_input=q_hidden, rt_input=schema_hidden)    # (data_size, )
                logits = merge_func(q_hidden, schema_hidden)  # (data_size, )
            logits_list.append(logits)

        # ======== define loss and updates ======== #

        pos_logits, neg_logits = logits_list
        margin_loss = tf.nn.relu(neg_logits + margin - pos_logits,
                                 name='margin_loss')
        self.avg_loss = tf.reduce_mean(margin_loss, name='avg_loss')
        tf.summary.scalar('avg_loss', self.avg_loss, collections=['optm'])
        optimizer = getattr(tf.train, optm_name)
        self.optm_step = optimizer(learning_rate).minimize(self.avg_loss)
        self.optm_summary = tf.summary.merge_all(key='optm')
        LogInfo.logs('* avg_loss and optm_step defined.')

        LogInfo.end_track()
예제 #16
0
    def forward(self, qw_emb, qw_len, sc_len, p_emb, pw_emb, p_len, pw_len, mode):
        """
        :param qw_emb:  (ds, qw_max_len, dim_qw_emb)
        :param qw_len:  (ds, )
        :param sc_len:  (ds, )
        :param p_emb:   (ds, sc_max_len, p_max_len, dim_p_emb)
        :param pw_emb:  (ds, sc_max_len, pw_max_len, dim_pw_emb)
        :param p_len:   (ds, sc_max_len)
        :param pw_len:  (ds, sc_max_len)
        :param mode:    tf.contrib.learn.ModeKeys. TRAIN / INFER
        :return:        (ds, ) as the overall relation matching score
        """
        LogInfo.begin_track('Build kernel: [att_rm_kernel]')
        assert mode in (tf.contrib.learn.ModeKeys.INFER, tf.contrib.learn.ModeKeys.TRAIN)
        LogInfo.logs('repr_mode = %s, scoring_mode = %s', self.repr_mode, self.scoring_mode)
        encoder_args = {'config': self.rnn_config, 'mode': mode}
        rnn_encoder = BidirectionalRNNEncoder(**encoder_args)

        comb_tensor_list = []
        for tensor_input in (p_emb, pw_emb, p_len, pw_len):
            ori_shape = tensor_input.get_shape().as_list()
            comb_shape = [-1] + ori_shape[2:]  # keep the dimensions after (ds, sc_max_len)
            comb_tensor_list.append(tf.reshape(tensor_input, shape=comb_shape))
        p_emb, pw_emb, p_len, pw_len = comb_tensor_list
        # p/pw_emb: (ds * sc_max_len, x_max_len, dim_x_emb)
        # p/pw_len: (ds * sc_max_len,)

        with tf.variable_scope('att_rm_kernel', reuse=tf.AUTO_REUSE):
            with tf.variable_scope('qw_repr', reuse=tf.AUTO_REUSE):
                qw_hidden = self.apply_seq_repr(input_emb=qw_emb, input_len=qw_len, mode=mode)
                # (ds, qw_max_len, dim_hidden)
                if self.residual:           # RNN hidden + RNN input
                    LogInfo.logs('Applying residual at qw_repr.')
                    assert self.dim_hidden == self.dim_emb
                    qw_hidden = tf.add(qw_hidden, qw_emb, name='qw_hidden_residual')
                    # (ds, qw_max_len, dim_hidden)
                qw_mask = tf.sequence_mask(lengths=qw_len,
                                           maxlen=self.qw_max_len,
                                           dtype=tf.float32,
                                           name='qw_mask')      # (ds, qw_max_len)
            qw_hidden = tf.reshape(
                tf.stack([qw_hidden] * self.sc_max_len, axis=1),
                shape=[-1, self.qw_max_len, self.dim_hidden],
                name='qw_hidden'
            )       # (ds * sc_max_len, qw_max_len, dim_hidden)
            qw_mask = tf.reshape(
                tf.stack([qw_mask] * self.sc_max_len, axis=1),
                shape=[-1, self.qw_max_len], name='qw_mask'
            )       # (ds * sc_max_len, qw_max_len)

            with tf.variable_scope('pw_repr', reuse=tf.AUTO_REUSE):
                if self.seq_merge_mode in ('fwbw', 'nfwbw'):
                    pw_rep = seq_encoding(emb_input=pw_emb, len_input=pw_len, encoder=rnn_encoder, fwbw=True)
                    # (ds * sc_max_len, dim_hidden)
                else:
                    pw_hidden = seq_encoding(emb_input=pw_emb, len_input=pw_len, encoder=rnn_encoder)
                    if self.seq_merge_mode == 'avg':
                        pw_rep = seq_hidden_averaging(seq_hidden_input=pw_hidden, len_input=pw_len)
                    else:
                        pw_rep = seq_hidden_max_pooling(seq_hidden_input=pw_hidden, len_input=pw_len)
                    # (ds * sc_max_len, dim_hidden)

            # Ready for attention calculation
            LogInfo.logs('Sequence merge mode: %s', self.seq_merge_mode)
            if self.seq_merge_mode != 'nfwbw':
                simple_att = SimpleAttention(lf_max_len=self.qw_max_len,
                                             dim_att_hidden=self.dim_att_hidden,
                                             att_func=self.att_func)
                q_att_rep, att_mat, q_weight = simple_att.forward(lf_input=qw_hidden,
                                                                  lf_mask=qw_mask,
                                                                  fix_rt_input=pw_rep)
                # q_att_rep: (ds * sc_max_len, dim_hidden)
                # att_mat:   (ds * sc_max_len, qw_max_len)
                # q_weight:  (ds * sc_max_len, qw_max_len)
                att_mat = tf.reshape(att_mat, shape=[-1, self.sc_max_len, self.qw_max_len],
                                     name='att_mat')        # (ds, sc_max_len, qw_max_len)
                q_weight = tf.reshape(q_weight, shape=[-1, self.sc_max_len, self.qw_max_len],
                                      name='q_weight')      # (ds, sc_max_len, qw_max_len)
                final_ret_dict = self.final_merge(
                    q_rep=q_att_rep, path_rep=pw_rep, sc_len=sc_len, sc_max_len=self.sc_max_len,
                    dim_hidden=self.dim_hidden, scoring_mode=self.scoring_mode
                )
                final_ret_dict['rm_att_mat'] = att_mat
                final_ret_dict['rm_q_weight'] = q_weight
                # rm_score, rm_path_score (optional), rm_att_mat, rm_q_weight
            else:
                """ Working in nfwbw mode, the fw/bw information are separated & calculating attention """
                fw_qw_hidden, bw_qw_hidden = tf.split(qw_hidden, num_or_size_splits=2, axis=-1)
                # both (ds * sc_max_len, qw_max_len, dim_hidden / 2)
                fw_pw_rep, bw_pw_rep = tf.split(pw_rep, num_or_size_splits=2, axis=-1)
                # both (ds * sc_max_len, dim_hidden / 2)
                simple_att = SimpleAttention(lf_max_len=self.qw_max_len,
                                             dim_att_hidden=self.dim_att_hidden,
                                             att_func=self.att_func)
                fw_q_att_rep, fw_att_mat, fw_q_weight = simple_att.forward(lf_input=fw_qw_hidden,
                                                                           lf_mask=qw_mask,
                                                                           fix_rt_input=fw_pw_rep)
                bw_q_att_rep, bw_att_mat, bw_q_weight = simple_att.forward(lf_input=bw_qw_hidden,
                                                                           lf_mask=qw_mask,
                                                                           fix_rt_input=bw_pw_rep)
                # fw/bw_q_att_rep: (ds * sc_max_len, dim_hidden / 2)
                # fw/bw_att_mat:   (ds * sc_max_len, qw_max_len)
                # fw/bw_q_weight:  (ds * sc_max_len, qw_max_len)
                fw_att_mat = tf.reshape(fw_att_mat, shape=[-1, self.sc_max_len, self.qw_max_len],
                                        name='fw_att_mat')  # (ds, sc_max_len, qw_max_len)
                bw_att_mat = tf.reshape(bw_att_mat, shape=[-1, self.sc_max_len, self.qw_max_len],
                                        name='bw_att_mat')  # (ds, sc_max_len, qw_max_len)
                fw_q_weight = tf.reshape(fw_q_weight, shape=[-1, self.sc_max_len, self.qw_max_len],
                                         name='fw_q_weight')  # (ds, sc_max_len, qw_max_len)
                bw_q_weight = tf.reshape(bw_q_weight, shape=[-1, self.sc_max_len, self.qw_max_len],
                                         name='bw_q_weight')  # (ds, sc_max_len, qw_max_len)
                q_att_rep = tf.concat([fw_q_att_rep, bw_q_att_rep], axis=-1, name='q_att_rep')
                # (ds * sc_max_len, dim_hidden)
                final_ret_dict = self.final_merge(
                    q_rep=q_att_rep, path_rep=pw_rep, sc_len=sc_len, sc_max_len=self.sc_max_len,
                    dim_hidden=self.dim_hidden, scoring_mode=self.scoring_mode
                )
                final_ret_dict['rm_fw_att_mat'] = fw_att_mat
                final_ret_dict['rm_bw_att_mat'] = bw_att_mat
                final_ret_dict['rm_fw_q_weight'] = fw_q_weight
                final_ret_dict['rm_bw_q_weight'] = bw_q_weight
                # rm_score, rm_path_score (optional), rm_fw/bw_att_mat, rm_fw/bw_q_weight
        LogInfo.end_track()
        return final_ret_dict
예제 #17
0
    def forward(self, qw_emb, qw_len, sc_len, p_emb, pw_emb, p_len, pw_len,
                mode):
        """
        :param qw_emb:  (ds, qw_max_len, dim_qw_emb)
        :param qw_len:  (ds, )
        :param sc_len:  (ds, )
        :param p_emb:   (ds, sc_max_len, p_max_len, dim_p_emb)
        :param pw_emb:  (ds, sc_max_len, pw_max_len, dim_pw_emb)
        :param p_len:   (ds, sc_max_len)
        :param pw_len:  (ds, sc_max_len)
        :param mode:    tf.contrib.learn.ModeKeys. TRAIN / INFER
        :return:        (ds, ) as the overall relation matching score
        """
        LogInfo.begin_track('Build kernel: [noatt_rm_kernel]')
        assert mode in (tf.contrib.learn.ModeKeys.INFER,
                        tf.contrib.learn.ModeKeys.TRAIN)
        LogInfo.logs('repr_mode = %s, scoring_mode = %s', self.repr_mode,
                     self.scoring_mode)
        encoder_args = {'config': self.rnn_config, 'mode': mode}
        rnn_encoder = BidirectionalRNNEncoder(**encoder_args)

        comb_tensor_list = []
        for tensor_input in (p_emb, pw_emb, p_len, pw_len):
            ori_shape = tensor_input.get_shape().as_list()
            comb_shape = [
                -1
            ] + ori_shape[2:]  # keep the dimensions after (ds, sc_max_len)
            comb_tensor_list.append(tf.reshape(tensor_input, shape=comb_shape))
        p_emb, pw_emb, p_len, pw_len = comb_tensor_list
        # p/pw_emb: (ds * sc_max_len, x_max_len, dim_x_emb)
        # p/pw_len: (ds * sc_max_len,)

        with tf.variable_scope('noatt_rm_kernel', reuse=tf.AUTO_REUSE):
            with tf.variable_scope('qw_repr', reuse=tf.AUTO_REUSE):
                if self.seq_merge_mode == 'fwbw':
                    q_rep = seq_encoding(emb_input=qw_emb,
                                         len_input=qw_len,
                                         encoder=rnn_encoder,
                                         fwbw=True)
                    # (ds, dim_hidden)
                else:
                    q_hidden = seq_encoding(emb_input=qw_emb,
                                            len_input=qw_len,
                                            encoder=rnn_encoder)
                    if self.seq_merge_mode == 'avg':
                        q_rep = seq_hidden_averaging(seq_hidden_input=q_hidden,
                                                     len_input=qw_len)
                    else:
                        q_rep = seq_hidden_max_pooling(
                            seq_hidden_input=q_hidden, len_input=qw_len)
                    # (ds, dim_hidden)
            q_rep = tf.reshape(tf.stack([q_rep] * self.sc_max_len, axis=1),
                               shape=[-1, self.dim_hidden],
                               name='q_rep')  # (ds * sc_max_len, dim_hidden)

            with tf.variable_scope('pw_repr', reuse=tf.AUTO_REUSE):
                if self.seq_merge_mode == 'fwbw':
                    pw_rep = seq_encoding(emb_input=pw_emb,
                                          len_input=pw_len,
                                          encoder=rnn_encoder,
                                          fwbw=True)
                    # (ds, dim_hidden)
                else:
                    pw_hidden = seq_encoding(emb_input=pw_emb,
                                             len_input=pw_len,
                                             encoder=rnn_encoder)
                    if self.seq_merge_mode == 'avg':
                        pw_rep = seq_hidden_averaging(
                            seq_hidden_input=pw_hidden, len_input=pw_len)
                    else:
                        pw_rep = seq_hidden_max_pooling(
                            seq_hidden_input=pw_hidden, len_input=pw_len)
                    # (ds * sc_max_len, dim_hidden)

            final_ret_dict = self.final_merge(q_rep=q_rep,
                                              path_rep=pw_rep,
                                              sc_len=sc_len,
                                              sc_max_len=self.sc_max_len,
                                              dim_hidden=self.dim_hidden,
                                              scoring_mode=self.scoring_mode)

        LogInfo.end_track()
        return final_ret_dict  # rm_score, rm_path_score (optional)
예제 #18
0
    def build_graph(self, mode_str):
        LogInfo.begin_track('Build graph: [MT-%s]', mode_str)
        mode = tf.contrib.learn.ModeKeys.INFER if mode_str == 'eval' else tf.contrib.learn.ModeKeys.TRAIN
        training = False if mode_str == 'eval' else True

        with tf.device('/cpu:0'):
            qw_emb = tf.nn.embedding_lookup(params=self.w_embedding,
                                            ids=self.input_tensor_dict['qw_input'],
                                            name='qw_emb')  # (ds, path_max_size, qw_max_len, dim_emb)
            dep_emb = tf.nn.embedding_lookup(params=self.w_embedding,
                                             ids=self.input_tensor_dict['dep_input'],
                                             name='dep_emb')  # (ds, path_max_size, qw_max_len, dim_emb)
            pw_emb = tf.nn.embedding_lookup(params=self.w_embedding,
                                            ids=self.input_tensor_dict['pw_input'],
                                            name='pw_emb')  # (ds, path_max_size, pw_max_len, dim_emb)
            pseq_emb = tf.nn.embedding_lookup(params=self.m_embedding,
                                              ids=self.input_tensor_dict['pseq_ids'],
                                              name='pseq_emb')  # (ds, path_max_size, pseq_max_size, dim_emb)
            path_emb = tf.nn.embedding_lookup(params=self.p_embedding,
                                              ids=self.input_tensor_dict['path_ids'],
                                              name='path_emb')  # (ds, path_max_size, dim_emb)
        pw_len = self.input_tensor_dict['pw_len']
        pseq_len = self.input_tensor_dict['pseq_len']
        qw_len = self.input_tensor_dict['qw_len']
        dep_len = self.input_tensor_dict['dep_len']

        qw_emb = self.dropout_layer(qw_emb, training=training)
        dep_emb = self.dropout_layer(dep_emb, training=training)
        pw_emb = self.dropout_layer(pw_emb, training=training)
        pseq_emb = self.dropout_layer(pseq_emb, training=training)
        path_emb = self.dropout_layer(path_emb, training=training)
        LogInfo.logs('Dropout performed.')

        rnn_encoder = None
        if self.rnn_config is not None:
            encoder_args = {'config': self.rnn_config, 'mode': mode}
            rnn_encoder = BidirectionalRNNEncoder(**encoder_args)

        """ For RM kernel """
        with tf.variable_scope('rm_task', reuse=tf.AUTO_REUSE):
            path_repr = self.build_path_repr__single(pw_emb=pw_emb, pw_len=pw_len,
                                                     pseq_emb=pseq_emb, pseq_len=pseq_len,
                                                     path_emb=path_emb, rnn_encoder=rnn_encoder)

            """ BiGRU """
            qw_repr = self.build_question_seq_repr(seq_emb=qw_emb, seq_len=qw_len, path_repr=path_repr,
                                                   rnn_encoder=rnn_encoder, scope_name='qw_repr')
            dep_repr = self.build_question_seq_repr(seq_emb=dep_emb, seq_len=dep_len, path_repr=path_repr,
                                                    rnn_encoder=rnn_encoder, scope_name='dep_repr')

            """ Temporal Conv Net """
            # qw_repr = self.build_question_seq_repr__tcn(seq_emb=qw_emb, seq_len=qw_len,
            #                                             training=training, scope_name='qw_repr')
            # dep_repr = self.build_question_seq_repr__tcn(seq_emb=dep_emb, seq_len=dep_len,
            #                                              training=training, scope_name='dep_repr')

            """ Stacking Conv Net (with Attention) """
            # qw_repr = self.build_question_seq_repr__scn(seq_emb=qw_emb, seq_len=qw_len, path_repr=path_repr,
            #                                             training=training, scope_name='qw_repr')
            # dep_repr = self.build_question_seq_repr__scn(seq_emb=dep_emb, seq_len=dep_len, path_repr=path_repr,
            #                                              training=training, scope_name='dep_repr')

            rm_final_feats, rm_score = self.rm_final_merge(
                path_repr=path_repr, qw_repr=qw_repr, dep_repr=dep_repr,
                path_cates=self.input_tensor_dict['path_cates'],
                path_size=self.input_tensor_dict['path_size']
            )

        """ For EL kernel """
        with tf.variable_scope('el_task', reuse=tf.AUTO_REUSE):
            el_final_feats, el_score = self.el_forward(el_indv_feats=self.input_tensor_dict['el_indv_feats'],
                                                       el_comb_feats=self.input_tensor_dict['el_comb_feats'],
                                                       el_mask=self.input_tensor_dict['el_mask'])

        """ For Full task """
        with tf.variable_scope('full_task', reuse=tf.AUTO_REUSE):
            full_final_feats, full_score = self.full_forward(
                el_final_feats=el_final_feats,
                rm_final_feats=rm_final_feats,
                extra_feats=self.input_tensor_dict['extra_feats']
            )

        """ Ready to return """
        tensor_dict = {'rm_score': rm_score,
                       'el_score': el_score,
                       'full_score': full_score,
                       'rm_final_feats': rm_final_feats,
                       'el_final_feats': el_final_feats,
                       'full_final_feats': full_final_feats}
        LogInfo.logs('%d tensors saved and return: %s', len(tensor_dict), tensor_dict.keys())
        LogInfo.end_track()
        return tensor_dict
예제 #19
0
class SkBiRNNModule(SkBaseModule):
    def __init__(self, path_max_len, dim_item_hidden, dim_kb_emb,
                 dim_sk_hidden, data_source, rnn_config):
        super(SkBiRNNModule, self).__init__(path_max_len=path_max_len,
                                            dim_item_hidden=dim_item_hidden,
                                            dim_kb_emb=dim_kb_emb,
                                            dim_sk_hidden=dim_sk_hidden)
        self.data_source = data_source
        assert self.data_source in ('kb', 'word', 'both')

        rnn_config['num_units'] = dim_sk_hidden / 2
        self.rnn_encoder = BidirectionalRNNEncoder(
            rnn_config, mode=tf.contrib.learn.ModeKeys.TRAIN)

    # Input:
    #   path_wd_hidden: (batch, path_max_len, dim_item_hidden)
    #   path_kb_hidden: (batch, path_max_len, dim_kb_emb)
    #   path_len: (batch, ) as int32
    #   focus_wd_hidden: (batch, dim_item_hidden)
    #   focus_kb_hidden: (batch, dim_kb_emb)
    # Output:
    #   sk_hidden: (batch, dim_sk_hidden)
    def forward(self,
                path_wd_hidden,
                path_kb_hidden,
                path_len,
                focus_wd_hidden,
                focus_kb_hidden,
                reuse=None):
        LogInfo.begin_track('SkBiRNNModule forward: ')

        with tf.variable_scope('SkBiRNNModule', reuse=reuse):
            if self.data_source == 'kb':
                use_path_hidden = path_kb_hidden
                use_focus_hidden = focus_kb_hidden
            elif self.data_source == 'word':
                use_path_hidden = path_wd_hidden
                use_focus_hidden = focus_wd_hidden
            else:
                use_path_hidden = tf.concat([path_kb_hidden, path_wd_hidden],
                                            axis=-1,
                                            name='use_path_hidden')
                # (batch, path_max_len, dim_item_hidden + dim_kb_hidden)
                use_focus_hidden = tf.concat(
                    [focus_kb_hidden, focus_wd_hidden],
                    axis=-1,
                    name='use_focus_hidden')
                # (batch, dim_item_hidden + dim_kb_hidden)

            use_path_emb_input = tf.concat(
                [tf.expand_dims(use_focus_hidden, axis=1), use_path_hidden],
                axis=1,
                name='use_path_emb_input'
            )  # (batch, path_max_len + 1, dim_use)
            show_tensor(use_path_emb_input)
            use_path_len = path_len + 1
            stamps = self.path_max_len + 1
            birnn_inputs = tf.unstack(use_path_emb_input,
                                      num=stamps,
                                      axis=1,
                                      name='birnn_inputs')
            encoder_output = self.rnn_encoder.encode(
                inputs=birnn_inputs, sequence_length=use_path_len, reuse=reuse)
            rnn_outputs = tf.stack(
                encoder_output.outputs, axis=1,
                name='rnn_outputs')  # (batch, path_max_len + 1, dim_sk_hidden)

            # Since we are in the BiRNN mode, we are simply taking average.

            sum_sk_hidden = tf.reduce_sum(
                rnn_outputs, axis=1,
                name='sum_sk_hidden')  # (batch, dim_sk_hidden)
            use_path_len_mat = tf.cast(
                tf.expand_dims(use_path_len, axis=1),
                dtype=tf.float32,
                name='use_path_len_mat')  # (batch, 1) as float32
            sk_hidden = tf.div(sum_sk_hidden,
                               use_path_len_mat,
                               name='sk_hidden')  # (batch, dim_sk_hidden)

        LogInfo.end_track()
        return sk_hidden
예제 #20
0
    def get_score(self, mode, qwords_embedding, qwords_len, sc_len,
                  preds_embedding, preds_len, pwords_embedding, pwords_len):
        """
        Produce the final similarity score.
        This function is the most important part in the optm/eval model.
        Just use cosine similarity
        :param mode: tf.contrib.learn.ModeKeys.TRAIN/INFER, which affects the dropout setting
        :param qwords_embedding:    (ds, q_max_len, dim_emb)
        :param qwords_len:          (ds, )
        :param sc_len:              (ds, )
        :param preds_embedding:     (ds, sc_max_len, path_max_len, dim_emb)
        :param preds_len:           (ds, sc_max_len)
        :param pwords_embedding:    (ds, sc_max_len, pword_max_len, dim_emb)
        :param pwords_len:          (ds, sc_max_len)
        :return: (ds, ) as the final similarity score
        """
        assert mode in (tf.contrib.learn.ModeKeys.TRAIN,
                        tf.contrib.learn.ModeKeys.INFER)

        if self.rnn_config['cell_class'] == 'None':
            # won't use any recurrent layer, but just using pure embedding as instead
            self.dim_hidden = self.dim_emb  # force set dim_hidden to be dim_emb
            q_encoder = pred_encoder = pword_encoder = None
        else:
            encoder_args = {'config': self.rnn_config, 'mode': mode}
            q_encoder = BidirectionalRNNEncoder(**encoder_args)
            pred_encoder = BidirectionalRNNEncoder(**encoder_args)
            pword_encoder = BidirectionalRNNEncoder(**encoder_args)
            """
            BidirectionalRNNEncoder will set the dropout according to the current mode (TRAIN/INFER)
            """

        with tf.name_scope('RelationMatchingKernel'):
            with tf.variable_scope('Question', reuse=self.reuse):
                if q_encoder is None:
                    qwords_hidden = qwords_embedding
                    # (ds, q_max_len, dim_hidden=dim_emb)
                else:
                    qwords_hidden = seq_encoding(
                        emb_input=qwords_embedding,
                        len_input=qwords_len,
                        encoder=q_encoder,
                        reuse=self.reuse)  # (ds, q_max_len, dim_hidden)
                q_hidden = seq_hidden_max_pooling(
                    seq_hidden_input=qwords_hidden, len_input=qwords_len)
            # (ds, dim_hidden), will be used in the final cosine similarity calculation

            # Step 1:   split schemas into paths
            #           merge ds and sc_max_len into one dimension
            qwords_hidden = tf.reshape(
                tf.stack([qwords_hidden] * self.sc_max_len, axis=1),
                shape=(-1, self.q_max_len, self.dim_hidden),
                name='qwords_hidden'
            )  # (ds * sc_max_len, q_max_len, dim_hidden)
            qwords_len = tf.reshape(tf.stack([qwords_len] * self.sc_max_len,
                                             axis=1),
                                    shape=(-1, ),
                                    name='qwords_len')  # (ds * sc_max_len, )
            # Now combine ds and sc_max_len into one dimension

            comb_tensor_list = []
            for tensor_input in (preds_embedding, preds_len, pwords_embedding,
                                 pwords_len):
                ori_shape = tensor_input.get_shape().as_list()
                comb_shape = [
                    -1
                ] + ori_shape[2:]  # keep the dimensions after (ds, sc_max_len)
                # show_tensor(tensor_input)
                # LogInfo.logs('ori_shape: %s, comb_shape: %s', ori_shape, comb_shape)
                comb_tensor_list.append(
                    tf.reshape(tensor_input, shape=comb_shape))
            [preds_embedding, preds_len, pwords_embedding,
             pwords_len] = comb_tensor_list
            # (ds * sc_max_len, xxxxxxx)
            # for tensor in comb_tensor_list:
            #     show_tensor(tensor)

            # Step 2: Compute basic hidden repr.
            # xxx_final_hidden: (ds * sc_max_len, dim_hidden)
            # (Optional) xxx_att_mat: (ds * sc_max_len, q_max_len, xxx_max_len)
            with tf.name_scope('Schema'):
                with tf.variable_scope('preds', reuse=self.reuse):
                    if pred_encoder is None:
                        preds_hidden = preds_embedding
                        # (ds * sc_max_len, path_max_len, dim_hidden=dim_emb)
                    else:
                        preds_hidden = seq_encoding(
                            emb_input=preds_embedding,
                            len_input=preds_len,
                            encoder=pred_encoder,
                            reuse=self.reuse
                        )  # (ds * sc_max_len, path_max_len, dim_hidden)
                    pred_final_hidden, pred_att_mat = self.aggregate_within_path(
                        qwords_hidden=qwords_hidden,
                        qwords_len=qwords_len,
                        pitems_hidden=preds_hidden,
                        pitems_len=preds_len,
                        item_max_len=self.path_max_len,
                        item_agg_mode=self.preds_agg_mode)
                with tf.variable_scope('pwords', reuse=self.reuse):
                    if pword_encoder is None:
                        pwords_hidden = pwords_embedding
                        # (ds * sc_max_len, pword_max_len, dim_hidden=dim_emb)
                    else:
                        pwords_hidden = seq_encoding(
                            emb_input=pwords_embedding,
                            len_input=pwords_len,
                            encoder=pword_encoder,
                            reuse=self.reuse
                        )  # (ds * sc_max_len, pword_max_len, dim_hidden)
                    pword_final_hidden, pword_att_mat = self.aggregate_within_path(
                        qwords_hidden=qwords_hidden,
                        qwords_len=qwords_len,
                        pitems_hidden=pwords_hidden,
                        pitems_len=pwords_len,
                        item_max_len=self.pword_max_len,
                        item_agg_mode=self.pwords_agg_mode)

                # Step 3:   1. merge preds and pwords
                #           2. combine paths into schemas
                #           3. produce the final score
                # path_merge_mode: Max: max pooling
                #                  Sum: simple summation
                with tf.name_scope('PathMerge'):
                    assert not (pword_final_hidden is None
                                and pred_final_hidden is None)
                    if pword_final_hidden is None:  # information comes from pwords only
                        path_final_hidden = pred_final_hidden
                    elif pred_final_hidden is None:  # information comes from preds only
                        path_final_hidden = pword_final_hidden
                    else:  # combine the information from both pwords and preds
                        assert self.path_merge_mode in ('Sum', 'Max')
                        if self.path_merge_mode == 'Sum':
                            path_final_hidden = tf.add(
                                pword_final_hidden,
                                pred_final_hidden,
                                name='path_final_hidden'
                            )  # (ds * sc_max_len, dim_hidden)
                        else:
                            path_final_hidden = tf.reduce_max(
                                tf.stack(
                                    [pword_final_hidden, pred_final_hidden],
                                    axis=0
                                ),  # (2, ds * sc_max_len, dim_hidden)
                                axis=0,
                                name='path_final_hidden'
                            )  # (ds * sc_max_len, dim_hidden)
                    sc_path_hidden = tf.reshape(
                        path_final_hidden,
                        shape=[-1, self.sc_max_len, self.dim_hidden],
                        name='sc_path_hidden')  # (ds, sc_max_len, dim_hidden)
                    # max pooling along all paths
                    sc_hidden = seq_hidden_max_pooling(
                        seq_hidden_input=sc_path_hidden,
                        len_input=sc_len)  # (ds, dim_hidden)
            score = cosine_sim(lf_input=q_hidden, rt_input=sc_hidden)  # (ds, )

        if pred_att_mat is not None:
            pred_att_mat = tf.reshape(
                pred_att_mat,
                [-1, self.sc_max_len, self.q_max_len, self.path_max_len],
                name='pred_att_mat'
            )  # (ds, sc_max_len, q_max_len, path_max_len)
        if pword_att_mat is not None:
            pword_att_mat = tf.reshape(
                pword_att_mat,
                [-1, self.sc_max_len, self.q_max_len, self.pword_max_len],
                name='pword_att_mat'
            )  # (ds, sc_max_len, q_max_len, pword_max_len)
        return pred_att_mat, pword_att_mat, score
예제 #21
0
    def forward(self, path_size, qw_emb, qw_len, pw_emb, pw_len, mode):
        """
        :param path_size: (ds, )
        :param qw_emb:  (ds, path_max_size, qw_max_len, dim_qw_emb)
        :param qw_len:  (ds, path_max_size)
        :param pw_emb:  (ds, path_max_size, pw_max_len, dim_pw_emb)
        :param pw_len:  (ds, path_max_size)
        :param mode:    tf.contrib.learn.ModeKeys. TRAIN / INFER
        """
        rm_ret_dict = {}  # <tensor_name, tensor>
        LogInfo.begin_track('Build kernel: [rm_kernel]')
        assert mode in (tf.contrib.learn.ModeKeys.INFER,
                        tf.contrib.learn.ModeKeys.TRAIN)

        dyn_path_max_size = tf.shape(qw_emb)[1]
        rnn_encoder = None
        if self.rnn_config is not None:
            encoder_args = {'config': self.rnn_config, 'mode': mode}
            rnn_encoder = BidirectionalRNNEncoder(**encoder_args)
        """ Merge first & second dimension: ds * path_max_size = DS """
        comb_tensor_list = []
        for tensor_input in (qw_emb, qw_len, pw_emb, pw_len):
            ori_shape = tensor_input.get_shape().as_list()
            comb_shape = [
                -1
            ] + ori_shape[2:]  # keep the dimensions after (ds, path_max_size)
            comb_tensor_list.append(tf.reshape(tensor_input, shape=comb_shape))
        qw_emb, qw_len, pw_emb, pw_len = comb_tensor_list
        """ pw side representation """
        pw_repr = seq_encoding_with_aggregation(
            emb_input=pw_emb,
            len_input=pw_len,
            rnn_encoder=rnn_encoder,
            seq_merge_mode=self.seq_merge_mode)
        # (DS, dim_hidden), that is (ds * path_max_size, dim_hidden)
        """ attention with qw repr """
        if self.att_config is not None:
            dim_att_len = self.att_config['dim_att_hidden']
            att_func = self.att_config['att_func']
            qw_hidden = seq_encoding(emb_input=qw_emb,
                                     len_input=qw_len,
                                     encoder=rnn_encoder)
            # (DS, qw_max_len, dim_hidden)
            qw_mask = tf.sequence_mask(lengths=qw_len,
                                       maxlen=self.qw_max_len,
                                       dtype=tf.float32,
                                       name='qw_mask')  # (DS, qw_max_len)
            simple_att = SimpleAttention(lf_max_len=self.qw_max_len,
                                         dim_att_hidden=dim_att_len,
                                         att_func=att_func)
            q_att_rep, att_mat, q_weight = simple_att.forward(
                lf_input=qw_hidden, lf_mask=qw_mask, fix_rt_input=pw_repr)
            # q_att_rep: (DS, dim_hidden)
            # att_mat:   (DS, qw_max_len)
            # q_weight:  (DS, qw_max_len)
            att_mat = tf.reshape(
                att_mat,
                shape=[-1, dyn_path_max_size, self.qw_max_len],
                name='att_mat')  # (ds, path_max_size, qw_max_len)
            q_weight = tf.reshape(
                q_weight,
                shape=[-1, dyn_path_max_size, self.qw_max_len],
                name='q_weight')  # (ds, path_max_size, qw_max_len)
            rm_ret_dict['rm_att_mat'] = att_mat
            rm_ret_dict['rm_q_weight'] = q_weight
            qw_repr = q_att_rep
        else:  # no attention, similar with above
            qw_repr = seq_encoding_with_aggregation(
                emb_input=qw_emb,
                len_input=qw_len,
                rnn_encoder=rnn_encoder,
                seq_merge_mode=self.seq_merge_mode)
        """ Calculating final score """
        final_ret_dict = self.final_merge(qw_repr=qw_repr,
                                          pw_repr=pw_repr,
                                          path_size=path_size,
                                          dyn_path_max_size=dyn_path_max_size,
                                          dim_hidden=self.dim_hidden,
                                          scoring_mode=self.scoring_mode)
        rm_ret_dict.update(final_ret_dict)

        LogInfo.end_track()
        return rm_ret_dict
예제 #22
0
    def _build_graph(self):
        self.query_idx = tf.placeholder(
            dtype=tf.int32, shape=[None, self.config.get("max_seq_len")])
        self.query_len = tf.placeholder(dtype=tf.int32, shape=[
            None,
        ])
        self.label = tf.placeholder(
            dtype=tf.int32, shape=[None, self.config.get("max_seq_len")])
        self.intent = tf.placeholder(dtype=tf.int32, shape=[
            None,
        ])
        self.link_mask = tf.placeholder(
            dtype=tf.int32, shape=[None, self.config.get("max_seq_len")])
        self.entity_idx = tf.placeholder(dtype=tf.int32,
                                         shape=[None,
                                                self.config.get("PN")])

        with tf.device('/cpu:0'), tf.name_scope("embedding_layer"):
            term_embedding = tf.get_variable(
                name="embedding",
                shape=[
                    self.config.get("vocab_size"),
                    self.config.get("embedding_dim")
                ],
                dtype=tf.float32,
                initializer=tf.constant_initializer(self.embedding_vocab))
            self.query_embedding = tf.nn.embedding_lookup(
                term_embedding, self.query_idx)
            self.entity_embedding = tf.nn.embedding_lookup(
                term_embedding, self.entity_idx)
            # tf.split:    Tensor -> list tensors
            # tf.stack:    list of tensors -> list of tensors
            self.query_slice = [
                tf.squeeze(_input, [1])
                for _input in tf.split(self.query_embedding,
                                       self.config.get("max_seq_len"),
                                       axis=1)
            ]

        # bi-LSTM
        with tf.name_scope("rnn_encoder"):
            rnn_config = dict()
            key_list = [
                "cell_class", "num_units", "dropout_input_keep_prob",
                "dropout_output_keep_prob", "num_layers"
            ]
            for key in key_list:
                rnn_config[key] = self.config.get(key)
            rnn_encoder = BidirectionalRNNEncoder(rnn_config, self.mode)
            self.encoder_output = rnn_encoder.encode(self.query_slice,
                                                     self.query_len)

        # hidden representation for intent detection
        with tf.name_scope("intent_hidden"):
            # average attention
            att_config = dict()
            key_list = ["num_units"]
            for key in key_list:
                att_config[key] = self.config.get(key)

            att = AttentionLayerAvg()
            self.query_hidden_avg = att.build(
                self.encoder_output.attention_values,
                self.encoder_output.attention_values_length)

        self.hidden_dim = self.query_hidden_avg.get_shape().as_list()[-1]

        # training parameters
        with tf.name_scope("parameters"):
            self.W_i = tf.get_variable(
                name="W_i",
                shape=[self.hidden_dim,
                       self.config.get("intent_num")],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
            self.b_i = tf.get_variable(
                name="b_i",
                shape=[self.config.get("intent_num")],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0.0))
            self.W_l = tf.get_variable(
                name="W_l",
                shape=[self.hidden_dim,
                       self.config.get("label_num")],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
            self.b_l = tf.get_variable(
                name="b_l",
                shape=[self.config.get("label_num")],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0.0))
            self.W_e = tf.get_variable(
                name="W_e",
                shape=[self.hidden_dim * 2,
                       self.config.get("embedding_dim")],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
            self.b_e = tf.get_variable(
                name="b_e",
                shape=[self.config.get("embedding_dim")],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0.0))

        # above bi-LSTM

        # ---------------------------------- Intent Detection --------------------------- #
        self.intent_layer = tf.nn.xw_plus_b(self.query_hidden_avg, self.W_i,
                                            self.b_i)

        # ---------------------------------- Sequence Labeling -------------------------- #
        self.outputs = tf.reshape(tensor=self.encoder_output.outputs,
                                  shape=[-1, self.hidden_dim])
        self.label_layer = tf.nn.xw_plus_b(self.outputs, self.W_l, self.b_l)
        # [B, T, class_num]
        self.label_layer = tf.reshape(tensor=self.label_layer,
                                      shape=[
                                          -1,
                                          self.config.get("max_seq_len"),
                                          self.config.get("label_num")
                                      ])

        # ---------------------------------- Entity Linking--- -------------------------- #
        """
        notice that entity linking in evaluation step is based on the result of sequence nlu
        so we do two-step evaluation
        """

        # [B, h_dim]
        self.mention = add_mask_then_avg(self.encoder_output.attention_values,
                                         self.link_mask)
        # [B, h_dim]
        self.context = add_mask_then_avg(self.encoder_output.attention_values,
                                         1 - self.link_mask)
        # [B, w2v_dim]
        self.left = tf.nn.xw_plus_b(
            tf.concat([self.mention, self.context], axis=1), self.W_e,
            self.b_e)
        # [B, 1, w2v_dim]
        self.left = tf.expand_dims(self.left, axis=1)
        # [B, PN, w2v_dim]
        self.left = tf.tile(self.left, multiples=[1, self.config.get("PN"), 1])
        # [B*PN, w2v_dim]
        self.left = tf.reshape(self.left,
                               shape=[-1, self.config.get("embedding_dim")])
        # [B*PN, w2v_dim]
        self.right = tf.reshape(self.entity_embedding,
                                shape=[-1,
                                       self.config.get("embedding_dim")])

        # [B*PN, ]
        self.link_score = cosine_sim(self.left, self.right)

        # ===================================== Loss ====================================== #
        if self.mode == tf.contrib.learn.ModeKeys.TRAIN:
            # loss for intent detection
            self.intent_loss = \
                tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.intent_layer,
                                                               labels=self.intent,
                                                               name="intent_loss")
            self.intent_loss = tf.reduce_mean(self.intent_loss)

            # loss for sequence nlu
            self.label_loss = softmax_sequence_loss(
                logits=self.label_layer,
                targets=self.label,
                sequence_length=self.query_len)
            self.label_loss = tf.reduce_mean(self.label_loss)

            # loss for entity linking
            self.link_loss = hinge_loss(scores=self.link_score,
                                        row=self.config.get("batch_size"),
                                        col=self.config.get("PN"),
                                        margin=self.config.get("margin"))

            # train op, currently three losses have equal weights
            self.train_op = get_optimizer(
                self.config.get("optimizer"),
                self.config.get("lr")).minimize(self.intent_loss +
                                                self.label_loss +
                                                self.link_loss)
예제 #23
0
    def get_score(self, mode, qwords_embedding, qwords_len, sc_len,
                  preds_embedding, preds_len, pwords_embedding, pwords_len):
        """
        Produce the final similarity score.
        This function is the most important part in the optm/eval model.
        Just use cosine similarity
        :param mode: tf.contrib.learn.ModeKeys.TRAIN/INFER, which affects the dropout setting
        :param qwords_embedding:    (ds, q_max_len, dim_emb)
        :param qwords_len:          (ds, )
        :param sc_len:              (ds, )
        :param preds_embedding:     (ds, sc_max_len, path_max_len, dim_emb)
        :param preds_len:           (ds, sc_max_len)
        :param pwords_embedding:    (ds, sc_max_len, pword_max_len, dim_emb)
        :param pwords_len:          (ds, sc_max_len)
        :return: score and attention matrices
           pred_att_mat:    (ds, sc_max_len, q_max_len, path_max_len)
           pword_att_mat:   (ds, sc_max_len, q_max_len, pword_max_len)
           score:           (ds,)
        """
        assert mode in (tf.contrib.learn.ModeKeys.TRAIN, tf.contrib.learn.ModeKeys.INFER)
        encoder_args = {'config': self.rnn_config, 'mode': mode}
        # set dropout according to the current mode (TRAIN/INFER)
        q_encoder = BidirectionalRNNEncoder(**encoder_args)
        pred_encoder = BidirectionalRNNEncoder(**encoder_args)
        pword_encoder = BidirectionalRNNEncoder(**encoder_args)
        cross_att = IndirectCrossAttention(**self.cross_att_config)

        with tf.name_scope('separated_relation_matching_kernel'):

            """ Preprocess: reshaping, merge ds and sc_max_len into one dimension """
            qwords_embedding = tf.reshape(
                tf.stack([qwords_embedding] * self.sc_max_len, axis=1),
                shape=(-1, self.q_max_len, self.dim_emb),
                name='qwords_hidden'
            )       # (ds * sc_max_len, q_max_len, dim_hidden)
            qwords_len = tf.reshape(
                tf.stack([qwords_len] * self.sc_max_len, axis=1),
                shape=(-1,),
                name='qwords_len'
            )       # (ds * sc_max_len, )
            comb_tensor_list = []
            for tensor_input in (preds_embedding, preds_len, pwords_embedding, pwords_len):
                ori_shape = tensor_input.get_shape().as_list()
                comb_shape = [-1] + ori_shape[2:]       # keep the dimensions after (ds, sc_max_len)
                # show_tensor(tensor_input)
                # LogInfo.logs('ori_shape: %s, comb_shape: %s', ori_shape, comb_shape)
                comb_tensor_list.append(tf.reshape(tensor_input, shape=comb_shape))
            [preds_embedding, preds_len, pwords_embedding, pwords_len] = comb_tensor_list
            # (ds * sc_max_len, xxxxxxx)
            # for tensor in comb_tensor_list:
            #     show_tensor(tensor)

            """ Step 1: Intra-attention (Optional) """
            # TODO: Question and pred_words

            """ Step 2: Cross-attention, make sure pword and preds treat properly """
            qwords_att_embedding, preds_att_info, pwords_att_info = cross_att.forward(
                q_input=qwords_embedding,
                p_input=preds_embedding,
                pw_input=pwords_embedding,
                q_len=qwords_len, p_len=preds_len, pw_len=pwords_len
            )
            preds_att_embedding, preds_att_mat = preds_att_info
            pwords_att_embedding, pwords_att_mat = pwords_att_info
            # x_embedding: (ds * sc_max_len, x_max_len, dim_emb)
            # x_att_mat: (ds * sc_max_len, q_max_len, x_max_len)

            """ Step 3: Perform RNN over embeddings """
            """ Want to share RNN parameters? Put'em into one var_scope """
            with tf.variable_scope('qwords', reuse=self.reuse):
                qwords_hidden = seq_encoding(
                    emb_input=qwords_att_embedding, len_input=qwords_len,
                    encoder=q_encoder, reuse=self.reuse
                )  # (ds * sc_max_len, q_max_len, dim_hidden)
                qword_final_hidden = seq_hidden_max_pooling(
                    seq_hidden_input=qwords_hidden, len_input=qwords_len)
            with tf.variable_scope('preds', reuse=self.reuse):
                preds_hidden = seq_encoding(
                    emb_input=preds_att_embedding, len_input=preds_len,
                    encoder=pred_encoder, reuse=self.reuse
                )  # (ds * sc_max_len, path_max_len, dim_hidden)
                pred_final_hidden = seq_hidden_max_pooling(
                    seq_hidden_input=preds_hidden, len_input=preds_len)
            with tf.variable_scope('pwords', reuse=self.reuse):
                pwords_hidden = seq_encoding(
                    emb_input=pwords_att_embedding, len_input=pwords_len,
                    encoder=pword_encoder, reuse=self.reuse
                )  # (ds * sc_max_len, pword_max_len, dim_hidden)
                pword_final_hidden = seq_hidden_max_pooling(
                    seq_hidden_input=pwords_hidden, len_input=pwords_len)
            # x_final_hidden: (ds * sc_max_len, dim_hidden)

            """ Step 4: Path merging, calculate final score """
            # TODO: use pword/pred or not
            if self.path_merge_mode == 'sum':
                path_final_hidden = tf.add(pword_final_hidden, pred_final_hidden,
                                           name='path_final_hidden')  # (ds * sc_max_len, dim_hidden)
            else:                   # max
                path_final_hidden = tf.reduce_max(
                    tf.stack([pword_final_hidden,
                              pred_final_hidden], axis=0),  # (2, ds * sc_max_len, dim_hidden)
                    axis=0, name='path_final_hidden')  # (ds * sc_max_len, dim_hidden)

            if self.final_score_mode == 'cos':
                path_score = cosine_sim(lf_input=qword_final_hidden,
                                        rt_input=path_final_hidden)     # (ds * sc_max_len, )
            else:                   # dot
                path_score = tf.reduce_sum(qword_final_hidden * path_final_hidden,
                                           axis=-1)     # (ds * sc_max_len, )
            path_score = tf.reshape(path_score, shape=[-1, self.sc_max_len],
                                    name='path_score')  # (ds, sc_max_len)
            sc_mask = tf.sequence_mask(lengths=sc_len,
                                       maxlen=self.sc_max_len,
                                       dtype=tf.float32,
                                       name='sc_mask')  # (ds, sc_max_len) as mask
            score = tf.reduce_sum(path_score * sc_mask, axis=-1, name='score')  # (ds, )

        pred_att_mat = tf.reshape(preds_att_mat, [-1, self.sc_max_len, self.q_max_len, self.path_max_len],
                                  name='pred_att_mat')          # (ds, sc_max_len, q_max_len, path_max_len)
        pword_att_mat = tf.reshape(pwords_att_mat, [-1, self.sc_max_len, self.q_max_len, self.pword_max_len],
                                   name='pword_att_mat')        # (ds, sc_max_len, q_max_len, pword_max_len)
        return pred_att_mat, pword_att_mat, score
예제 #24
0
    def _build_graph(self):
        self.query_idx = tf.placeholder(dtype=tf.int32,
                                        shape=[None, self.config.get("max_seq_len")])
        self.query_len = tf.placeholder(dtype=tf.int32,
                                        shape=[None, ])
        self.label = tf.placeholder(dtype=tf.int32,
                                    shape=[None, self.config.get("max_seq_len")])

        self.batch_size = self.config.get("batch_size")

        with tf.device('/cpu:0'), tf.name_scope("embedding_layer"):
            term_embedding = tf.get_variable(
                name="embedding",
                shape=[self.config.get("vocab_size"), self.config.get("embedding_dim")],
                dtype=tf.float32,
                initializer=tf.constant_initializer(self.embedding_vocab)
            )
            self.query_embedding = tf.nn.embedding_lookup(term_embedding, self.query_idx)
            # tf.split:    Tensor -> list tensors
            # tf.stack:    list of tensors -> one tensor
            self.query_slice = [
                tf.squeeze(_input, [1])
                for _input in tf.split(self.query_embedding,
                                       self.config.get("max_seq_len"),
                                       axis=1)
            ]
            # better style: use unstack!  one tensor -> list of tensors
            # equal to the above one
            # self.query_slice = tf.unstack(self.query_embedding, axis=1)

        # bi-LSTM
        with tf.name_scope("rnn_encoder"):
            rnn_config = dict()
            key_list = ["cell_class", "num_units", "dropout_input_keep_prob",
                        "dropout_output_keep_prob", "num_layers"]
            for key in key_list:
                rnn_config[key] = self.config.get(key)
            rnn_encoder = BidirectionalRNNEncoder(rnn_config, self.mode)
            self.biLstm = rnn_encoder.encode(self.query_slice, self.query_len)

        # output dim = 2 * rnn cell dim (fw + bw)
        self.hidden_dim = self.config.get("num_units") * 2
        self.biLstm_clip = tf.clip_by_value(self.biLstm.attention_values,
                                            -self.config.get("grad_clip"),
                                            self.config.get("grad_clip"))
        # training parameters
        with tf.name_scope("parameters"):
            self.W_l = tf.get_variable(name="W_l",
                                       shape=[self.hidden_dim,
                                              self.config.get("label_num")],
                                       dtype=tf.float32,
                                       initializer
                                       =tf.contrib.layers.xavier_initializer(uniform=True))
            self.b_l = tf.get_variable(name="b_l",
                                       shape=[self.config.get("label_num")],
                                       dtype=tf.float32,
                                       initializer=tf.constant_initializer(0.0))

        # above bi-LSTM
        self.outputs = tf.reshape(tensor=self.biLstm_clip,
                                  shape=[-1, self.hidden_dim])
        self.label_matrix = tf.nn.xw_plus_b(self.outputs, self.W_l, self.b_l)
        # [B, T, label_num]
        self.logits = tf.reshape(tensor=self.label_matrix,
                                 shape=[-1, self.config.get("max_seq_len"),
                                        self.config.get("label_num")])
        # [label_num, label_num]
        self.transition_mat = tf.get_variable(
            "transitions",
            shape=[self.config.get("label_num")+1, self.config.get("label_num")+1],
            initializer=tf.contrib.layers.xavier_initializer(uniform=True))

        # ===================================== Loss ====================================== #
        if self.mode == tf.contrib.learn.ModeKeys.TRAIN:

            # # softmax sequence loss for sequence nlu
            # self.loss = softmax_sequence_loss(logits=self.logits,
            #                                   targets=self.label,
            #                                   sequence_length=self.query_len)
            # self.loss = tf.reduce_mean(self.loss)

            # padding logits for crf loss, length += 1
            small = -1000.0
            start_logits = tf.concat(
                [small * tf.ones(shape=[self.batch_size, 1, self.config.get("label_num")]),
                 tf.zeros(shape=[self.batch_size, 1, 1])],
                axis=-1
            )
            LogInfo.logs(start_logits.get_shape().as_list())
            pad_logits = tf.cast(small * tf.ones([self.batch_size,
                                                  self.config.get("max_seq_len"), 1]), tf.float32)
            LogInfo.logs(pad_logits.get_shape().as_list())
            self.logits = tf.concat([self.logits, pad_logits], axis=-1)
            self.logits = tf.concat([start_logits, self.logits], axis=1)
            LogInfo.logs(self.logits.get_shape().as_list())
            targets = tf.concat(
                [tf.cast(self.config.get("label_num")*tf.ones([self.batch_size, 1]),
                         tf.int32),
                 self.label], axis=-1
            )
            LogInfo.logs(targets.get_shape().as_list())

            # CRF layer
            self.log_likelihood, self.transition_mat = \
                tf.contrib.crf.crf_log_likelihood(
                    inputs=self.logits,
                    tag_indices=targets,
                    transition_params=self.transition_mat,
                    sequence_lengths=self.query_len+1)
            self.loss = tf.reduce_mean(-self.log_likelihood)

            # train op
            self.global_step = tf.Variable(0, name="global_step",  trainable=False)
            optimizer = get_optimizer(self.config.get("optimizer"), self.config.get("lr"))
            grads_and_vars = optimizer.compute_gradients(self.loss)
            self.train_op = optimizer.apply_gradients(grads_and_vars, global_step=self.global_step)