Exemplo n.º 1
0
 def final_merge(qw_repr, pw_repr, path_size, dyn_path_max_size, dim_hidden,
                 scoring_mode):
     """
     Copied from compq_acl18.BaseRelationMatchingKernel
     :param qw_repr:             (ds * path_max_len, dim_hidden)
     :param pw_repr:             (ds * path_max_len, dim_hidden)
     :param path_size:           (ds, )
     :param dyn_path_max_size:   A tensor representing the max path_len in this batch
     :param dim_hidden:      dim_hidden
     :param scoring_mode:    compact / separated
     """
     LogInfo.logs('scoring_mode = [%s]', scoring_mode)
     if scoring_mode == 'compact':
         # aggregate by max-pooling, then overall cosine
         qw_repr = tf.reshape(
             qw_repr,
             shape=[-1, dyn_path_max_size, dim_hidden],
             name='qw_repr')  # (ds, path_max_size, dim_hidden)
         pw_repr = tf.reshape(pw_repr,
                              shape=[-1, dyn_path_max_size, dim_hidden],
                              name='pw_repr')
         q_final_repr = seq_hidden_max_pooling(seq_hidden_input=qw_repr,
                                               len_input=path_size)
         p_final_repr = seq_hidden_max_pooling(seq_hidden_input=pw_repr,
                                               len_input=path_size)
         # (ds, dim_hidden)
         score = cosine_sim(lf_input=q_final_repr,
                            rt_input=p_final_repr)  # (ds, )
         return {'rm_score': score}
     else:
         # separately calculate cosine, then sum together (with threshold control)
         raw_score = cosine_sim(lf_input=qw_repr,
                                rt_input=pw_repr)  # (ds * path_max_len, )
         raw_score = tf.reshape(raw_score,
                                shape=[-1, dyn_path_max_size],
                                name='raw_score')  # (ds, path_max_len)
         sim_ths = tf.get_variable(name='sim_ths',
                                   dtype=tf.float32,
                                   shape=[])
         path_score = tf.subtract(
             raw_score, sim_ths,
             name='path_score')  # add penalty to each potential seq.
         sc_mask = tf.sequence_mask(
             lengths=path_size,
             maxlen=dyn_path_max_size,
             dtype=tf.float32,
             name='sc_mask')  # (ds, sc_max_len) as mask
         score = tf.reduce_sum(path_score * sc_mask, axis=-1,
                               name='score')  # (ds, )
         return {'rm_score': score, 'rm_path_score': path_score}
 def final_merge(q_rep, path_rep, sc_len, sc_max_len, dim_hidden,
                 scoring_mode):
     """
     :param q_rep:           (ds * sc_max_len, dim_hidden)
     :param path_rep:        (ds * sc_max_len, dim_hidden), pay attention to the first dimension!
     :param sc_len:          (ds, )
     :param sc_max_len:      sc_max_len
     :param dim_hidden:      dim_hidden
     :param scoring_mode:    compact / separated
     """
     if scoring_mode == 'compact':
         # aggregate by max-pooling, then overall cosine
         q_att_rep = tf.reshape(
             q_rep, shape=[-1, sc_max_len, dim_hidden],
             name='q_att_rep')  # (ds, sc_max_len, dim_hidden)
         path_att_rep = tf.reshape(
             path_rep,
             shape=[-1, sc_max_len, dim_hidden],
             name='path_att_rep')  # (ds, sc_max_len, dim_hidden)
         q_final_rep = seq_hidden_max_pooling(seq_hidden_input=q_att_rep,
                                              len_input=sc_len)
         path_final_rep = seq_hidden_max_pooling(
             seq_hidden_input=path_att_rep,
             len_input=sc_len)  # (ds, dim_hidden)
         score = cosine_sim(lf_input=q_final_rep,
                            rt_input=path_final_rep)  # (ds, )
         return {'rm_score': score}
     else:
         # separately calculate cosine, then sum together (with threshold control)
         raw_score = cosine_sim(lf_input=q_rep,
                                rt_input=path_rep)  # (ds * sc_max_len, )
         raw_score = tf.reshape(raw_score,
                                shape=[-1, sc_max_len],
                                name='raw_score')  # (ds, sc_max_len)
         sim_ths = tf.get_variable(name='sim_ths',
                                   dtype=tf.float32,
                                   shape=[])
         path_score = tf.subtract(
             raw_score, sim_ths,
             name='path_score')  # add penalty to each potential seq.
         sc_mask = tf.sequence_mask(
             lengths=sc_len,
             maxlen=sc_max_len,
             dtype=tf.float32,
             name='sc_mask')  # (ds, sc_max_len) as mask
         score = tf.reduce_sum(path_score * sc_mask, axis=-1,
                               name='score')  # (ds, )
         return {'rm_score': score, 'rm_path_score': path_score}
Exemplo n.º 3
0
    def get_score(self, mode, qwords_embedding, qwords_len, sc_len,
                  preds_embedding, preds_len, pwords_embedding, pwords_len):
        """
        Produce the final similarity score.
        This function is the most important part in the optm/eval model.
        Just use cosine similarity
        :param mode: tf.contrib.learn.ModeKeys.TRAIN/INFER, which affects the dropout setting
        :param qwords_embedding:    (ds, q_max_len, dim_emb)
        :param qwords_len:          (ds, )
        :param sc_len:              (ds, )
        :param preds_embedding:     (ds, sc_max_len, path_max_len, dim_emb)
        :param preds_len:           (ds, sc_max_len)
        :param pwords_embedding:    (ds, sc_max_len, pword_max_len, dim_emb)
        :param pwords_len:          (ds, sc_max_len)
        :return: score and attention matrices
           pred_att_mat:    (ds, sc_max_len, q_max_len, path_max_len)
           pword_att_mat:   (ds, sc_max_len, q_max_len, pword_max_len)
           score:           (ds,)
        """
        assert mode in (tf.contrib.learn.ModeKeys.TRAIN, tf.contrib.learn.ModeKeys.INFER)
        encoder_args = {'config': self.rnn_config, 'mode': mode}
        # set dropout according to the current mode (TRAIN/INFER)
        q_encoder = BidirectionalRNNEncoder(**encoder_args)
        pred_encoder = BidirectionalRNNEncoder(**encoder_args)
        pword_encoder = BidirectionalRNNEncoder(**encoder_args)
        cross_att = IndirectCrossAttention(**self.cross_att_config)

        with tf.name_scope('separated_relation_matching_kernel'):

            """ Preprocess: reshaping, merge ds and sc_max_len into one dimension """
            qwords_embedding = tf.reshape(
                tf.stack([qwords_embedding] * self.sc_max_len, axis=1),
                shape=(-1, self.q_max_len, self.dim_emb),
                name='qwords_hidden'
            )       # (ds * sc_max_len, q_max_len, dim_hidden)
            qwords_len = tf.reshape(
                tf.stack([qwords_len] * self.sc_max_len, axis=1),
                shape=(-1,),
                name='qwords_len'
            )       # (ds * sc_max_len, )
            comb_tensor_list = []
            for tensor_input in (preds_embedding, preds_len, pwords_embedding, pwords_len):
                ori_shape = tensor_input.get_shape().as_list()
                comb_shape = [-1] + ori_shape[2:]       # keep the dimensions after (ds, sc_max_len)
                # show_tensor(tensor_input)
                # LogInfo.logs('ori_shape: %s, comb_shape: %s', ori_shape, comb_shape)
                comb_tensor_list.append(tf.reshape(tensor_input, shape=comb_shape))
            [preds_embedding, preds_len, pwords_embedding, pwords_len] = comb_tensor_list
            # (ds * sc_max_len, xxxxxxx)
            # for tensor in comb_tensor_list:
            #     show_tensor(tensor)

            """ Step 1: Intra-attention (Optional) """
            # TODO: Question and pred_words

            """ Step 2: Cross-attention, make sure pword and preds treat properly """
            qwords_att_embedding, preds_att_info, pwords_att_info = cross_att.forward(
                q_input=qwords_embedding,
                p_input=preds_embedding,
                pw_input=pwords_embedding,
                q_len=qwords_len, p_len=preds_len, pw_len=pwords_len
            )
            preds_att_embedding, preds_att_mat = preds_att_info
            pwords_att_embedding, pwords_att_mat = pwords_att_info
            # x_embedding: (ds * sc_max_len, x_max_len, dim_emb)
            # x_att_mat: (ds * sc_max_len, q_max_len, x_max_len)

            """ Step 3: Perform RNN over embeddings """
            """ Want to share RNN parameters? Put'em into one var_scope """
            with tf.variable_scope('qwords', reuse=self.reuse):
                qwords_hidden = seq_encoding(
                    emb_input=qwords_att_embedding, len_input=qwords_len,
                    encoder=q_encoder, reuse=self.reuse
                )  # (ds * sc_max_len, q_max_len, dim_hidden)
                qword_final_hidden = seq_hidden_max_pooling(
                    seq_hidden_input=qwords_hidden, len_input=qwords_len)
            with tf.variable_scope('preds', reuse=self.reuse):
                preds_hidden = seq_encoding(
                    emb_input=preds_att_embedding, len_input=preds_len,
                    encoder=pred_encoder, reuse=self.reuse
                )  # (ds * sc_max_len, path_max_len, dim_hidden)
                pred_final_hidden = seq_hidden_max_pooling(
                    seq_hidden_input=preds_hidden, len_input=preds_len)
            with tf.variable_scope('pwords', reuse=self.reuse):
                pwords_hidden = seq_encoding(
                    emb_input=pwords_att_embedding, len_input=pwords_len,
                    encoder=pword_encoder, reuse=self.reuse
                )  # (ds * sc_max_len, pword_max_len, dim_hidden)
                pword_final_hidden = seq_hidden_max_pooling(
                    seq_hidden_input=pwords_hidden, len_input=pwords_len)
            # x_final_hidden: (ds * sc_max_len, dim_hidden)

            """ Step 4: Path merging, calculate final score """
            # TODO: use pword/pred or not
            if self.path_merge_mode == 'sum':
                path_final_hidden = tf.add(pword_final_hidden, pred_final_hidden,
                                           name='path_final_hidden')  # (ds * sc_max_len, dim_hidden)
            else:                   # max
                path_final_hidden = tf.reduce_max(
                    tf.stack([pword_final_hidden,
                              pred_final_hidden], axis=0),  # (2, ds * sc_max_len, dim_hidden)
                    axis=0, name='path_final_hidden')  # (ds * sc_max_len, dim_hidden)

            if self.final_score_mode == 'cos':
                path_score = cosine_sim(lf_input=qword_final_hidden,
                                        rt_input=path_final_hidden)     # (ds * sc_max_len, )
            else:                   # dot
                path_score = tf.reduce_sum(qword_final_hidden * path_final_hidden,
                                           axis=-1)     # (ds * sc_max_len, )
            path_score = tf.reshape(path_score, shape=[-1, self.sc_max_len],
                                    name='path_score')  # (ds, sc_max_len)
            sc_mask = tf.sequence_mask(lengths=sc_len,
                                       maxlen=self.sc_max_len,
                                       dtype=tf.float32,
                                       name='sc_mask')  # (ds, sc_max_len) as mask
            score = tf.reduce_sum(path_score * sc_mask, axis=-1, name='score')  # (ds, )

        pred_att_mat = tf.reshape(preds_att_mat, [-1, self.sc_max_len, self.q_max_len, self.path_max_len],
                                  name='pred_att_mat')          # (ds, sc_max_len, q_max_len, path_max_len)
        pword_att_mat = tf.reshape(pwords_att_mat, [-1, self.sc_max_len, self.q_max_len, self.pword_max_len],
                                   name='pword_att_mat')        # (ds, sc_max_len, q_max_len, pword_max_len)
        return pred_att_mat, pword_att_mat, score
Exemplo n.º 4
0
    def forward(self, el_size, qw_emb, qw_len, pw_sup_emb, pw_sup_len,
                sup_size, type_trans, el_type_signa, el_indv_feats,
                el_comb_feats, mode):
        """
        Note: number of paths in a schema == number of entities in the schema
        :param el_size:         (ds, )
        :param qw_emb:          (ds, path_max_size, qw_max_len, dim_emb)
        :param qw_len:          (ds, path_max_size)
        :param pw_sup_emb:      (ds, path_max_size, sup_max_size, pw_max_len, dim_emb)
        :param pw_sup_len:      (ds, path_max_size, sup_max_size)
        :param sup_size:        (ds, path_max_size)
        :param type_trans:      (ds, path_max_size, sup_max_size, dim_type)
        :param el_type_signa:   (ds, el_max_size, dim_type)
        :param el_indv_feats:   (ds, el_max_size, el_feat_size)
        :param el_comb_feats:   (ds, 1)
        :param mode:    TRAIN / INFER
        """
        LogInfo.begin_track('Build kernel: [el_kernel]')
        assert mode in (tf.contrib.learn.ModeKeys.INFER,
                        tf.contrib.learn.ModeKeys.TRAIN)

        rnn_encoder = None
        if self.rnn_config is not None:
            encoder_args = {'config': self.rnn_config, 'mode': mode}
            rnn_encoder = BidirectionalRNNEncoder(**encoder_args)

        raw_shape = tf.shape(pw_sup_len)
        dyn_el_max_size = raw_shape[1]
        dyn_sup_max_size = raw_shape[2]
        """ Possible reshapes """
        qw_emb = tf.reshape(qw_emb, [-1, self.qw_max_len, self.dim_emb])
        # (ds * el_max_size, qw_max_len, dim_emb)
        qw_len = tf.reshape(qw_len, [-1])  # (ds * el_max_size)

        pw_sup_emb = tf.reshape(pw_sup_emb,
                                [-1, self.pw_max_len, self.dim_emb])
        # (ds * el_max_size * sup_max_size, pw_max_len, dim_emb)
        pw_sup_len = tf.reshape(pw_sup_len, [-1])
        """ Calculate attention / non-attention question representation """
        pw_sup_repr = seq_encoding_with_aggregation(
            emb_input=pw_sup_emb,
            len_input=pw_sup_len,
            rnn_encoder=rnn_encoder,
            seq_merge_mode=self.seq_merge_mode)
        # (ds*el_max_size*sup_max_size, dim_hidden)

        if self.att_config is not None:
            dim_att_len = self.att_config['dim_att_hidden']
            att_func = self.att_config['att_func']
            qw_hidden = seq_encoding(emb_input=qw_emb,
                                     len_input=qw_len,
                                     encoder=rnn_encoder)
            # (ds * el_max_size, qw_max_len, dim_hidden)
            qw_mask = tf.sequence_mask(lengths=qw_len,
                                       maxlen=self.qw_max_len,
                                       dtype=tf.float32,
                                       name='qw_mask')  # (DS, qw_max_len)
            tile_qw_hidden = tf.tile(
                tf.expand_dims(
                    qw_hidden,
                    axis=1),  # (ds*el_max_size, 1, qw_max_len, dim_hidden)
                multiples=[1, dyn_sup_max_size, 1, 1],
                name='tile_qw_hidden'
            )  # (ds*el_max_size, sup_max_size, qw_max_len, dim_hidden)
            tile_qw_mask = tf.tile(
                tf.expand_dims(qw_mask, axis=1),
                multiples=[1, dyn_sup_max_size, 1],
                name='tile_qw_mask'
            )  # (ds*el_max_size, sup_max_size, qw_max_len)

            expand_qw_mask = tf.reshape(tile_qw_mask, [-1, self.qw_max_len])
            expand_qw_hidden = tf.reshape(
                tile_qw_hidden, [-1, self.qw_max_len, self.dim_hidden])
            # (ds*el_max_size*sup_max_size, qw_max_len, dim_hidden)

            simple_att = SimpleAttention(lf_max_len=self.qw_max_len,
                                         dim_att_hidden=dim_att_len,
                                         att_func=att_func)
            qw_att_repr, _, _ = simple_att.forward(lf_input=expand_qw_hidden,
                                                   lf_mask=expand_qw_mask,
                                                   fix_rt_input=pw_sup_repr)
            # (ds*el_max_size*sup_max_size, dim_hidden)
            final_qw_repr = qw_att_repr
        else:
            qw_repr = seq_encoding_with_aggregation(
                emb_input=qw_emb,
                len_input=qw_len,
                rnn_encoder=rnn_encoder,
                seq_merge_mode=self.seq_merge_mode)
            # (ds*el_max_size, dim_hidden)
            tile_qw_repr = tf.tile(
                tf.expand_dims(qw_repr, axis=1),
                multiples=[1, dyn_sup_max_size, 1],
                name='tile_qw_repr'
            )  # (ds*el_max_size, sup_max_size, dim_hidden)
            expand_qw_repr = tf.reshape(tile_qw_repr, [-1, self.dim_hidden])
            final_qw_repr = expand_qw_repr

        with tf.variable_scope('el_kernel', reuse=tf.AUTO_REUSE):
            """ Calculate cosine similarity, and turning into type distribution """
            sim_score = cosine_sim(
                lf_input=final_qw_repr,
                rt_input=pw_sup_repr)  # (ds*el_max_size, sup_max_size)
            sim_score = tf.reshape(
                sim_score, shape=raw_shape,
                name='sim_score')  # (ds, el_max_size, sup_max_size)
            sup_mask = tf.sequence_mask(
                lengths=sup_size,
                maxlen=dyn_sup_max_size,
                dtype=tf.float32,
                name='sup_mask')  # (ds, el_max_size, sup_max_size)
            mask_score = sup_mask * sim_score + (1. -
                                                 sup_mask) * tf.float32.min
            pred_prob = tf.nn.softmax(
                logits=mask_score,
                name='pred_prob')  # (ds, el_max_size, sup_max_size)
            type_prob = tf.matmul(
                a=tf.expand_dims(pred_prob,
                                 axis=2),  # (ds, el_max_size, 1, sup_max_size)
                b=type_trans  # (ds, el_max_size, sup_max_size, dim_type)
            )  # (ds, el_max_size, 1, dim_type)
            type_prob = tf.squeeze(
                input=type_prob, axis=2,
                name='type_prob')  # (ds, el_max_size, dim_type)
            type_match_score = tf.reduce_sum(
                el_type_signa * type_prob,
                axis=-1,
                keep_dims=True,
                name='type_match_score')  # (ds, el_max_size, 1)
            """ Feature concat and produce scores """
            el_indv_concat = tf.concat(
                [type_match_score, el_indv_feats],
                axis=-1,
                name='el_indv_concat')  # (ds, el_max_size, 1+el_feat_size)
            el_mask = tf.sequence_mask(lengths=el_size,
                                       maxlen=dyn_el_max_size,
                                       dtype=tf.float32,
                                       name='el_mask')  # (ds, el_max_size)
            sum_indv_feats = tf.reduce_sum(
                el_indv_concat * tf.expand_dims(el_mask, axis=-1),
                axis=1,
                name='sum_indv_feats')  # (ds, 1+el_feat_size)
            final_feats = tf.concat([sum_indv_feats, el_comb_feats],
                                    axis=-1,
                                    name='final_feats')
            # (ds, 1+el_max_size+1) --> type_match + indv_feats + comb_feat
            el_score = tf.contrib.layers.fully_connected(
                inputs=final_feats,
                num_outputs=1,
                activation_fn=None,
                scope='out_fc',
                reuse=tf.AUTO_REUSE
            )  # (ds, 1), representing type matching score

        LogInfo.end_track()
        return el_score, final_feats
    def forward(self, el_size, qw_emb, qw_len,
                pw_sup_emb, pw_sup_len, type_trans,
                el_sup_mask, el_type_signa, el_indv_feats, el_comb_feats, mode):
        """
        Note: number of paths in a schema == number of entities in the schema
        local_mem_size: the local number of relevant paths in the current batch.
        :param el_size:         (ds, )
        :param qw_emb:          (ds, el_max_size, qw_max_len, dim_emb)
        :param qw_len:          (ds, el_max_size)
        :param pw_sup_emb:      (local_mem_size, pw_max_len, dim_emb)
        :param pw_sup_len:      (local_mem_size,)
        :param type_trans:      (local_mem_size, dim_type)
        :param el_sup_mask:     (ds, el_max_size, local_mem_size)
        :param el_type_signa:   (ds, el_max_size, dim_type)
        :param el_indv_feats:   (ds, el_max_size, el_feat_size)
        :param el_comb_feats:   (ds, 1)
        :param mode:    TRAIN / INFER
        """
        """
        180416:
        Let's assume ds=16*2=32, el_max_size=3, qw_max_len=20, dim_emb=300, local_mem_size=6K
        Then ds*el_max_size*qw_max_len ~= 2K
        """
        LogInfo.begin_track('Build kernel: [el_kernel]')
        assert mode in (tf.contrib.learn.ModeKeys.INFER, tf.contrib.learn.ModeKeys.TRAIN)

        rnn_encoder = None
        if self.rnn_config is not None:
            encoder_args = {'config': self.rnn_config, 'mode': mode}
            rnn_encoder = BidirectionalRNNEncoder(**encoder_args)
        raw_shape = tf.shape(el_sup_mask)
        el_max_size = raw_shape[1]
        local_mem_size = raw_shape[2]
        dim_type = tf.shape(type_trans)[1]

        """ Possible reshapes """
        qw_emb = tf.reshape(qw_emb, [-1, self.qw_max_len, self.dim_emb])
        # (ds * el_max_size, qw_max_len, dim_emb)
        qw_len = tf.reshape(qw_len, [-1])       # (ds * el_max_size)

        """ Calculate attention / non-attention question representation """
        pw_sup_repr = seq_encoding_with_aggregation(emb_input=pw_sup_emb, len_input=pw_sup_len,
                                                    rnn_encoder=rnn_encoder,
                                                    seq_merge_mode=self.seq_merge_mode)
        # (local_mem_size, dim_hidden)
        if self.att_config is not None:
            att_func = self.att_config['att_func']
            assert att_func == 'dot'        # TODO: Currently only support dot product
            qw_hidden = seq_encoding(emb_input=qw_emb, len_input=qw_len, encoder=rnn_encoder)
            # (ds*el_max_size, qw_max_len, dim_hidden)
            qw_mask = tf.sequence_mask(lengths=qw_len,
                                       maxlen=self.qw_max_len,
                                       dtype=tf.float32,
                                       name='qw_mask')  # (ds*el_max_size, qw_max_len)
            flat_qw_hidden = tf.reshape(qw_hidden, shape=[-1, self.dim_hidden], name='flat_qw_hidden')
            # (ds*el_max_size*qw_max_len, dim_hidden)

            """ Step 1: Very simple & fast way to calculate dot attention """
            raw_mutual_att_mat = tf.matmul(
                flat_qw_hidden,
                tf.transpose(pw_sup_repr),
                name='raw_mutual_att_mat'
            )   # (ds*el_max_size*qw_max_len, local_mem_size)
            mutual_att_mat = tf.reshape(
                raw_mutual_att_mat,
                shape=[-1, self.qw_max_len, local_mem_size],
                name='mutual_att_mat')
            # (ds*el_max_size, qw_max_len, local_mem_size)

            """ Step 2: Prepare masked att_mat and normalized distribution """
            qw_mask_3dim = tf.expand_dims(qw_mask, axis=-1, name='qw_mask_3dim')
            # (ds*el_max_size, qw_max_len, 1)
            masked_att_mat = (
                qw_mask_3dim * mutual_att_mat +
                (1. - qw_mask_3dim) * mutual_att_mat * tf.float32.min
            )   # (ds*el_max_size, qw_max_len, local_mem_size)
            unnorm_weight = tf.transpose(masked_att_mat, [0, 2, 1], name='masked_att_mat')
            # (ds*el_max_size, local_mem_size, qw_max_len)
            norm_weight = tf.nn.softmax(unnorm_weight, name='norm_weight')

            """ Step 3: Got final qw_repr w.r.t different support paths """
            qw_repr = tf.matmul(norm_weight, qw_hidden, name='qw_repr')
            # batch_matmul: (ds*el_max_size, local_mem_size, qw_max_len)

        else:       # noAtt, very simple
            raw_qw_repr = seq_encoding_with_aggregation(emb_input=qw_emb, len_input=qw_len,
                                                        rnn_encoder=rnn_encoder,
                                                        seq_merge_mode=self.seq_merge_mode)
            # (ds*el_max_size, dim_hidden)
            qw_repr = tf.expand_dims(raw_qw_repr, axis=1, name='qw_repr')
            # (ds*el_max_size, 1, dim_hidden)

        with tf.variable_scope('el_kernel', reuse=tf.AUTO_REUSE):
            """ Calculate cosine similarity """
            flat_pw_sup_repr = tf.expand_dims(pw_sup_repr, axis=0, name='flat_pw_sup_repr')
            # (1, local_mem_size, dim_hidden)
            sim_score = cosine_sim(
                lf_input=qw_repr,               # (ds*el_max_size, [1 or local_mem_size], qw_max_len)
                rt_input=flat_pw_sup_repr       # (1, local_mem_size, dim_hidden)
            )
            # (ds*el_max_size, local_mem_size)

            """ Turning into type distribution """
            flat_el_sup_mask = tf.reshape(el_sup_mask, shape=[-1, local_mem_size], name='flat_el_sup_mask')
            # (ds*el_max_size, local_mem_size)
            mask_score = flat_el_sup_mask * sim_score + (1. - flat_el_sup_mask) * tf.float32.min
            pred_prob = tf.nn.softmax(logits=mask_score, name='pred_prob')
            # (ds*el_max_size, local_mem_size)
            raw_type_prob = tf.matmul(pred_prob, type_trans, name='raw_type_prob')
            # (ds*el_max_size, dim_type)
            type_prob = tf.reshape(raw_type_prob, shape=[-1, el_max_size, dim_type], name='type_prob')
            # (ds, el_max_size, dim_type)
            type_match_score = tf.reduce_sum(el_type_signa*type_prob,
                                             axis=-1, keep_dims=True,
                                             name='type_match_score')   # (ds, el_max_size, 1)

            """ Feature concat and produce scores """
            el_indv_concat = tf.concat([type_match_score, el_indv_feats],
                                       axis=-1, name='el_indv_concat')  # (ds, el_max_size, 1+el_feat_size)
            el_mask = tf.sequence_mask(lengths=el_size, maxlen=el_max_size,
                                       dtype=tf.float32, name='el_mask')    # (ds, el_max_size)
            sum_indv_feats = tf.reduce_sum(
                el_indv_concat * tf.expand_dims(el_mask, axis=-1),
                axis=1, name='sum_indv_feats'
            )   # (ds, 1+el_feat_size)
            final_feats = tf.concat([sum_indv_feats, el_comb_feats], axis=-1, name='final_feats')
            # (ds, 1+el_max_size+1) --> type_match + indv_feats + comb_feat
            el_score = tf.contrib.layers.fully_connected(
                inputs=final_feats,
                num_outputs=1,
                activation_fn=None,
                scope='out_fc',
                reuse=tf.AUTO_REUSE
            )  # (ds, 1), representing type matching score

        LogInfo.end_track()
        return el_score, final_feats
Exemplo n.º 6
0
    def get_score(self, mode, qwords_embedding, qwords_len, sc_len,
                  preds_embedding, preds_len, pwords_embedding, pwords_len):
        """
        Produce the final similarity score.
        This function is the most important part in the optm/eval model.
        Just use cosine similarity
        :param mode: tf.contrib.learn.ModeKeys.TRAIN/INFER, which affects the dropout setting
        :param qwords_embedding:    (ds, q_max_len, dim_emb)
        :param qwords_len:          (ds, )
        :param sc_len:              (ds, )
        :param preds_embedding:     (ds, sc_max_len, path_max_len, dim_emb)
        :param preds_len:           (ds, sc_max_len)
        :param pwords_embedding:    (ds, sc_max_len, pword_max_len, dim_emb)
        :param pwords_len:          (ds, sc_max_len)
        :return: (ds, ) as the final similarity score
        """
        assert mode in (tf.contrib.learn.ModeKeys.TRAIN,
                        tf.contrib.learn.ModeKeys.INFER)

        if self.rnn_config['cell_class'] == 'None':
            # won't use any recurrent layer, but just using pure embedding as instead
            self.dim_hidden = self.dim_emb  # force set dim_hidden to be dim_emb
            q_encoder = pred_encoder = pword_encoder = None
        else:
            encoder_args = {'config': self.rnn_config, 'mode': mode}
            q_encoder = BidirectionalRNNEncoder(**encoder_args)
            pred_encoder = BidirectionalRNNEncoder(**encoder_args)
            pword_encoder = BidirectionalRNNEncoder(**encoder_args)
            """
            BidirectionalRNNEncoder will set the dropout according to the current mode (TRAIN/INFER)
            """

        with tf.name_scope('RelationMatchingKernel'):
            with tf.variable_scope('Question', reuse=self.reuse):
                if q_encoder is None:
                    qwords_hidden = qwords_embedding
                    # (ds, q_max_len, dim_hidden=dim_emb)
                else:
                    qwords_hidden = seq_encoding(
                        emb_input=qwords_embedding,
                        len_input=qwords_len,
                        encoder=q_encoder,
                        reuse=self.reuse)  # (ds, q_max_len, dim_hidden)
                q_hidden = seq_hidden_max_pooling(
                    seq_hidden_input=qwords_hidden, len_input=qwords_len)
            # (ds, dim_hidden), will be used in the final cosine similarity calculation

            # Step 1:   split schemas into paths
            #           merge ds and sc_max_len into one dimension
            qwords_hidden = tf.reshape(
                tf.stack([qwords_hidden] * self.sc_max_len, axis=1),
                shape=(-1, self.q_max_len, self.dim_hidden),
                name='qwords_hidden'
            )  # (ds * sc_max_len, q_max_len, dim_hidden)
            qwords_len = tf.reshape(tf.stack([qwords_len] * self.sc_max_len,
                                             axis=1),
                                    shape=(-1, ),
                                    name='qwords_len')  # (ds * sc_max_len, )
            # Now combine ds and sc_max_len into one dimension

            comb_tensor_list = []
            for tensor_input in (preds_embedding, preds_len, pwords_embedding,
                                 pwords_len):
                ori_shape = tensor_input.get_shape().as_list()
                comb_shape = [
                    -1
                ] + ori_shape[2:]  # keep the dimensions after (ds, sc_max_len)
                # show_tensor(tensor_input)
                # LogInfo.logs('ori_shape: %s, comb_shape: %s', ori_shape, comb_shape)
                comb_tensor_list.append(
                    tf.reshape(tensor_input, shape=comb_shape))
            [preds_embedding, preds_len, pwords_embedding,
             pwords_len] = comb_tensor_list
            # (ds * sc_max_len, xxxxxxx)
            # for tensor in comb_tensor_list:
            #     show_tensor(tensor)

            # Step 2: Compute basic hidden repr.
            # xxx_final_hidden: (ds * sc_max_len, dim_hidden)
            # (Optional) xxx_att_mat: (ds * sc_max_len, q_max_len, xxx_max_len)
            with tf.name_scope('Schema'):
                with tf.variable_scope('preds', reuse=self.reuse):
                    if pred_encoder is None:
                        preds_hidden = preds_embedding
                        # (ds * sc_max_len, path_max_len, dim_hidden=dim_emb)
                    else:
                        preds_hidden = seq_encoding(
                            emb_input=preds_embedding,
                            len_input=preds_len,
                            encoder=pred_encoder,
                            reuse=self.reuse
                        )  # (ds * sc_max_len, path_max_len, dim_hidden)
                    pred_final_hidden, pred_att_mat = self.aggregate_within_path(
                        qwords_hidden=qwords_hidden,
                        qwords_len=qwords_len,
                        pitems_hidden=preds_hidden,
                        pitems_len=preds_len,
                        item_max_len=self.path_max_len,
                        item_agg_mode=self.preds_agg_mode)
                with tf.variable_scope('pwords', reuse=self.reuse):
                    if pword_encoder is None:
                        pwords_hidden = pwords_embedding
                        # (ds * sc_max_len, pword_max_len, dim_hidden=dim_emb)
                    else:
                        pwords_hidden = seq_encoding(
                            emb_input=pwords_embedding,
                            len_input=pwords_len,
                            encoder=pword_encoder,
                            reuse=self.reuse
                        )  # (ds * sc_max_len, pword_max_len, dim_hidden)
                    pword_final_hidden, pword_att_mat = self.aggregate_within_path(
                        qwords_hidden=qwords_hidden,
                        qwords_len=qwords_len,
                        pitems_hidden=pwords_hidden,
                        pitems_len=pwords_len,
                        item_max_len=self.pword_max_len,
                        item_agg_mode=self.pwords_agg_mode)

                # Step 3:   1. merge preds and pwords
                #           2. combine paths into schemas
                #           3. produce the final score
                # path_merge_mode: Max: max pooling
                #                  Sum: simple summation
                with tf.name_scope('PathMerge'):
                    assert not (pword_final_hidden is None
                                and pred_final_hidden is None)
                    if pword_final_hidden is None:  # information comes from pwords only
                        path_final_hidden = pred_final_hidden
                    elif pred_final_hidden is None:  # information comes from preds only
                        path_final_hidden = pword_final_hidden
                    else:  # combine the information from both pwords and preds
                        assert self.path_merge_mode in ('Sum', 'Max')
                        if self.path_merge_mode == 'Sum':
                            path_final_hidden = tf.add(
                                pword_final_hidden,
                                pred_final_hidden,
                                name='path_final_hidden'
                            )  # (ds * sc_max_len, dim_hidden)
                        else:
                            path_final_hidden = tf.reduce_max(
                                tf.stack(
                                    [pword_final_hidden, pred_final_hidden],
                                    axis=0
                                ),  # (2, ds * sc_max_len, dim_hidden)
                                axis=0,
                                name='path_final_hidden'
                            )  # (ds * sc_max_len, dim_hidden)
                    sc_path_hidden = tf.reshape(
                        path_final_hidden,
                        shape=[-1, self.sc_max_len, self.dim_hidden],
                        name='sc_path_hidden')  # (ds, sc_max_len, dim_hidden)
                    # max pooling along all paths
                    sc_hidden = seq_hidden_max_pooling(
                        seq_hidden_input=sc_path_hidden,
                        len_input=sc_len)  # (ds, dim_hidden)
            score = cosine_sim(lf_input=q_hidden, rt_input=sc_hidden)  # (ds, )

        if pred_att_mat is not None:
            pred_att_mat = tf.reshape(
                pred_att_mat,
                [-1, self.sc_max_len, self.q_max_len, self.path_max_len],
                name='pred_att_mat'
            )  # (ds, sc_max_len, q_max_len, path_max_len)
        if pword_att_mat is not None:
            pword_att_mat = tf.reshape(
                pword_att_mat,
                [-1, self.sc_max_len, self.q_max_len, self.pword_max_len],
                name='pword_att_mat'
            )  # (ds, sc_max_len, q_max_len, pword_max_len)
        return pred_att_mat, pword_att_mat, score
Exemplo n.º 7
0
    def compute_attention(self, left_tensor, left_len, right_tensor,
                          right_len):
        """
        :param left_tensor: [B, T1, dim1] 
        :param right_tensor: [B, T2, dim2]
        :param left_len: [B, ] real length of left tensor
        :param right_len: [B, ] real length of right tensor
        :return: [B, ] similarity score, [B, T1, T2] attention matrix
        """

        # Fully connected layers to transform both left and right tensor
        # into a tensor with `hidden_dim` units
        # [B, T1, dim]
        att_left = tf.contrib.layers.fully_connected(
            inputs=left_tensor,
            num_outputs=self.hidden_dim,
            activation_fn=None,
            scope="att_keys")
        # [B, T2, dim]
        att_right = tf.contrib.layers.fully_connected(
            inputs=right_tensor,
            num_outputs=self.hidden_dim,
            activation_fn=None,
            scope="att_query")
        # [B, T1, 1, dim]
        att_left = tf.expand_dims(att_left, axis=2)
        # [B, T1, T2, dim]
        att_left = tf.tile(att_left, multiples=[1, 1, self.right_max_len, 1])
        # [B, T2, 1, dim]
        att_right = tf.expand_dims(att_right, axis=2)
        # [B, T2, T1, dim]
        att_right = tf.tile(att_right, multiples=[1, 1, self.left_max_len, 1])
        # [B, T1, T2, dim]
        att_right = tf.transpose(att_right, perm=[0, 2, 1, 3])

        v_att = tf.get_variable(name="v_att",
                                shape=[self.hidden_dim],
                                dtype=tf.float32)

        # [B, T1, T2]
        att_matrix = tf.reduce_sum(v_att * tf.tanh(att_left + att_right),
                                   axis=3)

        # [B, T1]
        att_val_left = tf.reduce_sum(att_matrix, axis=2)

        # [B, T2]
        att_val_right = tf.reduce_sum(att_matrix, axis=1)
        """
        Kangqi on 180211:
        A bit mistake here. att_matrix haven't removed padding elements (att_maxtrix[i][j]) yet,
        but those elements make contribution to att_val_left/right.
        The masking process below cannot remove such information.
        """

        # Replace all scores for padded inputs with tf.float32.min
        left_mask = tf.sequence_mask(lengths=tf.to_int32(left_len),
                                     maxlen=tf.to_int32(self.left_max_len),
                                     dtype=tf.float32)  # [B, T1]
        left_val = att_val_left * left_mask + (
            (1.0 - left_mask) * tf.float32.min)

        right_mask = tf.sequence_mask(lengths=tf.to_int32(right_len),
                                      maxlen=tf.to_int32(self.right_max_len),
                                      dtype=tf.float32)  # [B, T2]
        right_val = att_val_right * right_mask + (
            (1.0 - right_mask) * tf.float32.min)

        # Normalize the scores
        left_normalized = tf.nn.softmax(left_val, name="left_normalized")
        right_normalized = tf.nn.softmax(right_val, name="right_normalized")

        # Calculate the weighted average of the attention inputs
        # according to the attention values
        # [B, T1, 1] * [B, T1, dim] --> [B, T1, dim] --> [B, dim]
        left_weighted = tf.expand_dims(left_normalized, axis=2) * left_tensor
        left_weighted = tf.reduce_sum(left_weighted, axis=1)

        # [B, dim]
        right_weighted = tf.expand_dims(right_normalized,
                                        axis=2) * right_tensor
        right_weighted = tf.reduce_sum(right_weighted, axis=1)

        # Kangqi edit: cosine similarity is much better
        # score = tf.contrib.layers.fully_connected(
        #     inputs=tf.concat([left_weighted, right_weighted], axis=1),
        #     num_outputs=1,
        #     activation_fn=None,
        #     scope="output")
        score = cosine_sim(lf_input=left_weighted, rt_input=right_weighted)

        # Kangqi edit: return more items.
        # return score, att_matrix

        # Kangqi edit: we need masked att_matrix, padding 0 on useless rows / columns
        left_cube_mask = tf.stack([left_mask] * self.right_max_len,
                                  axis=-1)  # [B, T1, T2]
        right_cube_mask = tf.stack([right_mask] * self.left_max_len,
                                   axis=1)  # [B, T1, T2]
        masked_att_matrix = att_matrix * left_cube_mask * right_cube_mask  # [B, T1, T2]

        return left_weighted, right_weighted, masked_att_matrix, score
Exemplo n.º 8
0
    def _rm_final_merge(self, path_repr, sent_repr, path_cates, path_size, scope_name):
        """
        Kernel part of rm_final_merge.
        :param path_repr: (ds, path_max_len, dim_path_hidden)
        :param sent_repr: (ds, path_max_len, dim_hidden)
        :param path_cates: (ds, path_max_len, 5)
        :param path_size: (ds, )
        :param scope_name:
        """
        with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE):
            dim_path_hidden = path_repr.get_shape().as_list()[-1]
            if self.scoring_mode == 'compact':
                sent_repr = seq_hidden_max_pooling(seq_hidden_input=sent_repr, len_input=path_size)
                path_repr = seq_hidden_max_pooling(seq_hidden_input=path_repr, len_input=path_size)
                # (ds, dim_xx_hidden)
            else:
                assert self.scoring_mode in ('separated', 'bao')
                sent_repr = tf.reshape(sent_repr, [-1, self.dim_hidden])
                path_repr = tf.reshape(path_repr, [-1, dim_path_hidden])
                # (ds*path_max_size, dim_xx_hidden)

            """ Now apply final functions """
            if self.final_func == 'dot':
                assert dim_path_hidden == self.dim_hidden
                merge_score = tf.reduce_sum(sent_repr*path_repr, axis=-1, name='merge_score')
            elif self.final_func == 'cos':
                assert dim_path_hidden == self.dim_hidden
                merge_score = cosine_sim(lf_input=sent_repr, rt_input=path_repr)
            elif self.final_func == 'bilinear':
                bilinear_mat = tf.get_variable(name='bilinear_mat',
                                               shape=[dim_path_hidden, self.dim_hidden],
                                               dtype=tf.float32,
                                               initializer=tf.contrib.layers.xavier_initializer())
                proj_repr = tf.matmul(path_repr, bilinear_mat, name='proj_repr')
                merge_score = tf.reduce_sum(sent_repr * proj_repr, axis=-1, name='merge_score')
            else:
                assert self.final_func.startswith('fc')
                hidden_size = int(self.final_func[2:])
                concat_repr = tf.concat([sent_repr, path_repr], axis=-1, name='concat_repr')
                concat_hidden = tf.contrib.layers.fully_connected(
                    inputs=concat_repr,
                    num_outputs=hidden_size,
                    activation_fn=tf.nn.relu,
                    scope='fc1',
                    reuse=tf.AUTO_REUSE
                )   # (ds / ds*path_max_len, 32)
                merge_score = tf.contrib.layers.fully_connected(
                    inputs=concat_hidden,
                    num_outputs=1,
                    activation_fn=None,
                    scope='fc2',
                    reuse=tf.AUTO_REUSE
                )   # (ds / ds*path_max_len, 1)
                merge_score = tf.squeeze(merge_score, axis=-1, name='merge_score')

            """ add scores together, if working in separated / bao mode """
            if self.scoring_mode == 'compact':
                rm_score = merge_score
                rm_final_feats = tf.expand_dims(rm_score, -1, 'rm_final_feats')     # (ds, 1)
            else:
                assert self.scoring_mode in ('separated', 'bao')
                merge_score = tf.reshape(merge_score, [-1, self.path_max_size])     # (ds, path_max_size)
                path_mask = tf.sequence_mask(
                    lengths=path_size, maxlen=self.path_max_size,
                    dtype=tf.float32, name='path_mask'
                )  # (ds, path_max_size) as mask
                if self.scoring_mode == 'separated':
                    rm_score = tf.reduce_sum(merge_score*path_mask, axis=-1, name='rm_score')  # (ds, )
                    rm_final_feats = tf.expand_dims(rm_score, -1, 'rm_final_feats')     # (ds, 1)
                else:   # Imitate Bao's implementation, care about the detail path category
                    mask_score_3d = tf.expand_dims(
                        merge_score * path_mask,
                        axis=1, name='mask_score_3d'
                    )  # (ds, 1, path_max_size)
                    rm_final_feats = tf.squeeze(
                        tf.matmul(mask_score_3d, path_cates),  # (ds, 1, 5)
                        axis=1, name='rm_final_feats'
                    )  # (ds, 5)
                    rm_score_2d = tf.contrib.layers.fully_connected(
                        inputs=rm_final_feats,
                        num_outputs=1,
                        activation_fn=None,
                        scope='out_fc',
                        reuse=tf.AUTO_REUSE
                    )  # (ds / ds*path_max_len, 1)
                    rm_score = tf.squeeze(rm_score_2d, axis=-1, name='rm_score')
        return rm_final_feats, rm_score