Exemplo n.º 1
0
class ItemBiRNNModule(ItemBaseModule):

    # Below: parameters in BidirectionalRNNEncoder
    # key_list = ["cell_class", "num_units", "dropout_input_keep_prob",
    #             "dropout_output_keep_prob", "num_layers", "reuse"]
    # Without any attention mechanism
    def __init__(self, item_max_len, dim_wd_emb, dim_item_hidden, rnn_config):
        super(ItemBiRNNModule, self).__init__(item_max_len=item_max_len,
                                              dim_wd_emb=dim_wd_emb,
                                              dim_item_hidden=dim_item_hidden)
        rnn_config['num_units'] = dim_item_hidden / 2  # bidirectional
        self.rnn_encoder = BidirectionalRNNEncoder(
            rnn_config, mode=tf.contrib.learn.ModeKeys.TRAIN)

    # Input:
    #   item_wd_embedding: (batch, item_max_len, dim_wd_emb)
    #   item_len: (batch, ) as int32
    # Output:
    #   item_wd_hidden: (batch, dim_item_hidden)
    def forward(self, item_wd_embedding, item_len, reuse=None):
        LogInfo.begin_track('ItemBiRNNModule forward: ')

        with tf.variable_scope('ItemBiRNNModule', reuse=reuse):
            # stamps = item_wd_embedding.get_shape().as_list()[1]
            stamps = self.item_max_len
            show_tensor(item_wd_embedding)
            birnn_inputs = tf.unstack(item_wd_embedding,
                                      num=stamps,
                                      axis=1,
                                      name='birnn_inputs')
            # rnn_input: a list of stamps elements: (batch, n_emb)
            encoder_output = self.rnn_encoder.encode(inputs=birnn_inputs,
                                                     sequence_length=item_len,
                                                     reuse=reuse)
            birnn_outputs = tf.stack(
                encoder_output.outputs, axis=1,
                name='birnn_outputs')  # (data_size, q_len, n_hidden_emb)
            LogInfo.logs('birnn_output = %s',
                         birnn_outputs.get_shape().as_list())

            sum_wd_hidden = tf.reduce_sum(birnn_outputs,
                                          axis=1)  # (data_size, n_hidden_emb)
            item_len_mat = tf.cast(tf.expand_dims(item_len, axis=1),
                                   dtype=tf.float32)  # (data_size, 1) as float
            item_wd_hidden = tf.div(
                sum_wd_hidden,
                tf.maximum(item_len_mat, 1),  # avoid dividing by 0
                name='item_wd_hidden')  # (data_size, n_hidden_emb)
            LogInfo.logs('item_wd_hidden = %s',
                         item_wd_hidden.get_shape().as_list())

        LogInfo.end_track()
        return item_wd_hidden
Exemplo n.º 2
0
    def encode_question(self, question, question_len, answer, config):
        """
        Encode question with answer-aware attention
        :param question: [B, T, dim]
        :param question_len: [B, T, ]
        :param answer: [B, dim]
        :param config: parameter dict
        :return: [B, hidden_dim]
        """

        # bi-LSTM
        with tf.name_scope("rnn_encoder"):
            rnn_config = dict()
            key_list = [
                "cell_class", "num_units", "dropout_input_keep_prob",
                "dropout_output_keep_prob", "num_layers", "reuse"
            ]
            for key in key_list:
                rnn_config[key] = config[key]
            rnn_encoder = BidirectionalRNNEncoder(rnn_config, config["mode"])
            encoder_output = rnn_encoder.encode(question, question_len)

        # attention mechanism
        with tf.name_scope("attention"):
            att_config = dict()
            key_list = ["num_units"]
            for key in key_list:
                att_config[key] = config[key]

            if config["attention"] == "bah":
                att = AttentionLayerBahdanau(att_config)
                question_hidden = att.build(
                    answer, encoder_output.attention_values,
                    encoder_output.attention_values_length)
            elif config["attention"] == "avg":
                att = AttentionLayerAvg()
                question_hidden = att.build(
                    encoder_output.attention_values,
                    encoder_output.attention_values_length)

        return question_hidden
Exemplo n.º 3
0
class QBiRNNModule(QBaseModule):

    # Below: parameters in BidirectionalRNNEncoder
    # key_list = ["cell_class", "num_units", "dropout_input_keep_prob",
    #             "dropout_output_keep_prob", "num_layers", "reuse"]
    # Without any attention mechanism
    def __init__(self,
                 dim_q_hidden,
                 rnn_config,
                 q_max_len=None,
                 dim_wd_emb=None):
        super(QBiRNNModule, self).__init__(q_max_len=q_max_len,
                                           dim_wd_emb=dim_wd_emb,
                                           dim_q_hidden=dim_q_hidden)

        rnn_config['num_units'] = dim_q_hidden / 2  # bidirectional
        self.rnn_encoder = BidirectionalRNNEncoder(
            rnn_config, mode=tf.contrib.learn.ModeKeys.TRAIN)

    def forward(self, q_embedding, q_len, reuse=None):
        LogInfo.begin_track('QBiRNNModule forward: ')

        with tf.variable_scope('QBiRNNModule', reuse=reuse):
            # stamps = q_embedding.get_shape().as_list()[1]
            stamps = self.q_max_len
            birnn_inputs = tf.unstack(q_embedding,
                                      num=stamps,
                                      axis=1,
                                      name='birnn_inputs')
            # rnn_input: a list of stamps elements: (batch, n_emb)
            encoder_output = self.rnn_encoder.encode(inputs=birnn_inputs,
                                                     sequence_length=q_len,
                                                     reuse=reuse)
            q_hidden = tf.stack(
                encoder_output.outputs, axis=1,
                name='q_hidden')  # (batch, q_max_len, dim_q_hidden)

        LogInfo.end_track()
        return q_hidden
Exemplo n.º 4
0
    def _build_graph(self):
        self.context_idx = tf.placeholder(
            dtype=tf.int32, shape=[None, self.config.get("max_seq_len")])
        self.context_seq = tf.placeholder(dtype=tf.int32, shape=[
            None,
        ])
        self.pinlei_idx = tf.placeholder(dtype=tf.int32, shape=[
            None,
        ])

        with tf.device('/cpu:0'), tf.name_scope("embedding_layer"):
            # LogInfo.logs("Embedding shape: %s (%d*%d).", self.embedding.shape,
            #              self.config.get("vocab_size"), self.config.get("embedding_dim"))
            term_embedding = tf.get_variable(
                name="embedding",
                shape=[
                    self.config.get("vocab_size"),
                    self.config.get("embedding_dim")
                ],
                dtype=tf.float32,
                initializer=tf.constant_initializer(self.embedding))
            self.context_embedding = tf.nn.embedding_lookup(
                term_embedding, self.context_idx)
            self.pinlei_embedding = tf.nn.embedding_lookup(
                term_embedding, self.pinlei_idx)
            # shape = [max_seq_len, batch_size, embedding_dim], feed to rnn_encoder
            self.context_slice = [
                tf.squeeze(_input, [1])
                for _input in tf.split(self.context_embedding,
                                       self.config.get("max_seq_len"),
                                       axis=1)
            ]

        # bi-LSTM
        with tf.name_scope("rnn_encoder"):
            rnn_config = dict()
            key_list = [
                "cell_class", "num_units", "dropout_input_keep_prob",
                "dropout_output_keep_prob", "num_layers", "reuse"
            ]
            for key in key_list:
                rnn_config[key] = self.config.get(key)
            rnn_encoder = BidirectionalRNNEncoder(rnn_config, self.mode)
            self.encoder_output = rnn_encoder.encode(self.context_slice,
                                                     self.context_seq)

        # attention mechanism
        with tf.name_scope("attention"):
            att_config = dict()
            key_list = ["num_units"]
            for key in key_list:
                att_config[key] = self.config.get(key)

            if self.config.get("attention") == "bah":
                att = AttentionLayerBahdanau_old(att_config)
                self.query_hidden = att.build(
                    self.pinlei_embedding,
                    self.encoder_output.attention_values,
                    self.encoder_output.attention_values_length)
            elif self.config.get("attention") == "avg":
                att = AttentionLayerAvg_old()
                self.query_hidden = att.build(
                    self.encoder_output.attention_values,
                    self.encoder_output.attention_values_length)

        self.hidden_dim = self.query_hidden.get_shape().as_list()[-1]

        # training parameters
        with tf.name_scope("parameters"):
            self.W_p = tf.get_variable(
                name="W_p",
                shape=[self.config.get("embedding_dim"), self.hidden_dim],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
            self.b_p = tf.get_variable(
                name="b_p",
                shape=[self.hidden_dim],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0.0))
            self.W_f = tf.get_variable(
                name="W_f",
                shape=[self.hidden_dim * 2, self.hidden_dim],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
            self.b_f = tf.get_variable(
                name="b_f",
                shape=[self.hidden_dim],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0.0))
            self.W_o = tf.get_variable(
                name="W_o",
                shape=[self.hidden_dim, 1],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
            self.b_o = tf.get_variable(
                name="b_o",
                shape=[1],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0.0))
        # above bi-LSTM + attention
        with tf.name_scope("score"):
            self.pinlei_hidden = self.transfer(
                tf.add(tf.matmul(self.pinlei_embedding, self.W_p), self.b_p))
            self.final = self.transfer(
                tf.add(
                    tf.matmul(
                        tf.concat([self.query_hidden, self.pinlei_hidden], 1),
                        self.W_f), self.b_f))
            # self.score = tf.add(tf.matmul(self.final, self.W_o), self.b_o)  # tensorflow 1.0.0
            self.score = tf.nn.xw_plus_b(self.final, self.W_o, self.b_o)

        # hinge loss
        if self.mode == tf.contrib.learn.ModeKeys.TRAIN:
            self.loss = hinge_loss(
                self.score,
                int(self.config.get("batch_size") / self.config.get("PN")),
                self.config.get("PN"), self.config.get("margin"))
            self.train_op = get_optimizer(self.config.get("optimizer"),
                                          self.config.get("lr")).minimize(
                                              self.loss)
Exemplo n.º 5
0
    def _build_graph(self):
        self.query_idx = tf.placeholder(dtype=tf.int32,
                                        shape=[None, self.config.get("max_seq_len")])
        self.query_len = tf.placeholder(dtype=tf.int32,
                                        shape=[None, ])
        self.label = tf.placeholder(dtype=tf.int32,
                                    shape=[None, self.config.get("max_seq_len")])

        self.batch_size = self.config.get("batch_size")

        with tf.device('/cpu:0'), tf.name_scope("embedding_layer"):
            term_embedding = tf.get_variable(
                name="embedding",
                shape=[self.config.get("vocab_size"), self.config.get("embedding_dim")],
                dtype=tf.float32,
                initializer=tf.constant_initializer(self.embedding_vocab)
            )
            self.query_embedding = tf.nn.embedding_lookup(term_embedding, self.query_idx)
            # tf.split:    Tensor -> list tensors
            # tf.stack:    list of tensors -> one tensor
            self.query_slice = [
                tf.squeeze(_input, [1])
                for _input in tf.split(self.query_embedding,
                                       self.config.get("max_seq_len"),
                                       axis=1)
            ]
            # better style: use unstack!  one tensor -> list of tensors
            # equal to the above one
            # self.query_slice = tf.unstack(self.query_embedding, axis=1)

        # bi-LSTM
        with tf.name_scope("rnn_encoder"):
            rnn_config = dict()
            key_list = ["cell_class", "num_units", "dropout_input_keep_prob",
                        "dropout_output_keep_prob", "num_layers"]
            for key in key_list:
                rnn_config[key] = self.config.get(key)
            rnn_encoder = BidirectionalRNNEncoder(rnn_config, self.mode)
            self.biLstm = rnn_encoder.encode(self.query_slice, self.query_len)

        # output dim = 2 * rnn cell dim (fw + bw)
        self.hidden_dim = self.config.get("num_units") * 2
        self.biLstm_clip = tf.clip_by_value(self.biLstm.attention_values,
                                            -self.config.get("grad_clip"),
                                            self.config.get("grad_clip"))
        # training parameters
        with tf.name_scope("parameters"):
            self.W_l = tf.get_variable(name="W_l",
                                       shape=[self.hidden_dim,
                                              self.config.get("label_num")],
                                       dtype=tf.float32,
                                       initializer
                                       =tf.contrib.layers.xavier_initializer(uniform=True))
            self.b_l = tf.get_variable(name="b_l",
                                       shape=[self.config.get("label_num")],
                                       dtype=tf.float32,
                                       initializer=tf.constant_initializer(0.0))

        # above bi-LSTM
        self.outputs = tf.reshape(tensor=self.biLstm_clip,
                                  shape=[-1, self.hidden_dim])
        self.label_matrix = tf.nn.xw_plus_b(self.outputs, self.W_l, self.b_l)
        # [B, T, label_num]
        self.logits = tf.reshape(tensor=self.label_matrix,
                                 shape=[-1, self.config.get("max_seq_len"),
                                        self.config.get("label_num")])
        # [label_num, label_num]
        self.transition_mat = tf.get_variable(
            "transitions",
            shape=[self.config.get("label_num")+1, self.config.get("label_num")+1],
            initializer=tf.contrib.layers.xavier_initializer(uniform=True))

        # ===================================== Loss ====================================== #
        if self.mode == tf.contrib.learn.ModeKeys.TRAIN:

            # # softmax sequence loss for sequence nlu
            # self.loss = softmax_sequence_loss(logits=self.logits,
            #                                   targets=self.label,
            #                                   sequence_length=self.query_len)
            # self.loss = tf.reduce_mean(self.loss)

            # padding logits for crf loss, length += 1
            small = -1000.0
            start_logits = tf.concat(
                [small * tf.ones(shape=[self.batch_size, 1, self.config.get("label_num")]),
                 tf.zeros(shape=[self.batch_size, 1, 1])],
                axis=-1
            )
            LogInfo.logs(start_logits.get_shape().as_list())
            pad_logits = tf.cast(small * tf.ones([self.batch_size,
                                                  self.config.get("max_seq_len"), 1]), tf.float32)
            LogInfo.logs(pad_logits.get_shape().as_list())
            self.logits = tf.concat([self.logits, pad_logits], axis=-1)
            self.logits = tf.concat([start_logits, self.logits], axis=1)
            LogInfo.logs(self.logits.get_shape().as_list())
            targets = tf.concat(
                [tf.cast(self.config.get("label_num")*tf.ones([self.batch_size, 1]),
                         tf.int32),
                 self.label], axis=-1
            )
            LogInfo.logs(targets.get_shape().as_list())

            # CRF layer
            self.log_likelihood, self.transition_mat = \
                tf.contrib.crf.crf_log_likelihood(
                    inputs=self.logits,
                    tag_indices=targets,
                    transition_params=self.transition_mat,
                    sequence_lengths=self.query_len+1)
            self.loss = tf.reduce_mean(-self.log_likelihood)

            # train op
            self.global_step = tf.Variable(0, name="global_step",  trainable=False)
            optimizer = get_optimizer(self.config.get("optimizer"), self.config.get("lr"))
            grads_and_vars = optimizer.compute_gradients(self.loss)
            self.train_op = optimizer.apply_gradients(grads_and_vars, global_step=self.global_step)
Exemplo n.º 6
0
    def _build_graph(self):
        self.query_idx = tf.placeholder(
            dtype=tf.int32, shape=[None, self.config.get("max_seq_len")])
        self.query_len = tf.placeholder(dtype=tf.int32, shape=[
            None,
        ])
        self.label = tf.placeholder(
            dtype=tf.int32, shape=[None, self.config.get("max_seq_len")])
        self.intent = tf.placeholder(dtype=tf.int32, shape=[
            None,
        ])
        self.link_mask = tf.placeholder(
            dtype=tf.int32, shape=[None, self.config.get("max_seq_len")])
        self.entity_idx = tf.placeholder(dtype=tf.int32,
                                         shape=[None,
                                                self.config.get("PN")])

        with tf.device('/cpu:0'), tf.name_scope("embedding_layer"):
            term_embedding = tf.get_variable(
                name="embedding",
                shape=[
                    self.config.get("vocab_size"),
                    self.config.get("embedding_dim")
                ],
                dtype=tf.float32,
                initializer=tf.constant_initializer(self.embedding_vocab))
            self.query_embedding = tf.nn.embedding_lookup(
                term_embedding, self.query_idx)
            self.entity_embedding = tf.nn.embedding_lookup(
                term_embedding, self.entity_idx)
            # tf.split:    Tensor -> list tensors
            # tf.stack:    list of tensors -> list of tensors
            self.query_slice = [
                tf.squeeze(_input, [1])
                for _input in tf.split(self.query_embedding,
                                       self.config.get("max_seq_len"),
                                       axis=1)
            ]

        # bi-LSTM
        with tf.name_scope("rnn_encoder"):
            rnn_config = dict()
            key_list = [
                "cell_class", "num_units", "dropout_input_keep_prob",
                "dropout_output_keep_prob", "num_layers"
            ]
            for key in key_list:
                rnn_config[key] = self.config.get(key)
            rnn_encoder = BidirectionalRNNEncoder(rnn_config, self.mode)
            self.encoder_output = rnn_encoder.encode(self.query_slice,
                                                     self.query_len)

        # hidden representation for intent detection
        with tf.name_scope("intent_hidden"):
            # average attention
            att_config = dict()
            key_list = ["num_units"]
            for key in key_list:
                att_config[key] = self.config.get(key)

            att = AttentionLayerAvg()
            self.query_hidden_avg = att.build(
                self.encoder_output.attention_values,
                self.encoder_output.attention_values_length)

        self.hidden_dim = self.query_hidden_avg.get_shape().as_list()[-1]

        # training parameters
        with tf.name_scope("parameters"):
            self.W_i = tf.get_variable(
                name="W_i",
                shape=[self.hidden_dim,
                       self.config.get("intent_num")],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
            self.b_i = tf.get_variable(
                name="b_i",
                shape=[self.config.get("intent_num")],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0.0))
            self.W_l = tf.get_variable(
                name="W_l",
                shape=[self.hidden_dim,
                       self.config.get("label_num")],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
            self.b_l = tf.get_variable(
                name="b_l",
                shape=[self.config.get("label_num")],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0.0))
            self.W_e = tf.get_variable(
                name="W_e",
                shape=[self.hidden_dim * 2,
                       self.config.get("embedding_dim")],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
            self.b_e = tf.get_variable(
                name="b_e",
                shape=[self.config.get("embedding_dim")],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0.0))

        # above bi-LSTM

        # ---------------------------------- Intent Detection --------------------------- #
        self.intent_layer = tf.nn.xw_plus_b(self.query_hidden_avg, self.W_i,
                                            self.b_i)

        # ---------------------------------- Sequence Labeling -------------------------- #
        self.outputs = tf.reshape(tensor=self.encoder_output.outputs,
                                  shape=[-1, self.hidden_dim])
        self.label_layer = tf.nn.xw_plus_b(self.outputs, self.W_l, self.b_l)
        # [B, T, class_num]
        self.label_layer = tf.reshape(tensor=self.label_layer,
                                      shape=[
                                          -1,
                                          self.config.get("max_seq_len"),
                                          self.config.get("label_num")
                                      ])

        # ---------------------------------- Entity Linking--- -------------------------- #
        """
        notice that entity linking in evaluation step is based on the result of sequence nlu
        so we do two-step evaluation
        """

        # [B, h_dim]
        self.mention = add_mask_then_avg(self.encoder_output.attention_values,
                                         self.link_mask)
        # [B, h_dim]
        self.context = add_mask_then_avg(self.encoder_output.attention_values,
                                         1 - self.link_mask)
        # [B, w2v_dim]
        self.left = tf.nn.xw_plus_b(
            tf.concat([self.mention, self.context], axis=1), self.W_e,
            self.b_e)
        # [B, 1, w2v_dim]
        self.left = tf.expand_dims(self.left, axis=1)
        # [B, PN, w2v_dim]
        self.left = tf.tile(self.left, multiples=[1, self.config.get("PN"), 1])
        # [B*PN, w2v_dim]
        self.left = tf.reshape(self.left,
                               shape=[-1, self.config.get("embedding_dim")])
        # [B*PN, w2v_dim]
        self.right = tf.reshape(self.entity_embedding,
                                shape=[-1,
                                       self.config.get("embedding_dim")])

        # [B*PN, ]
        self.link_score = cosine_sim(self.left, self.right)

        # ===================================== Loss ====================================== #
        if self.mode == tf.contrib.learn.ModeKeys.TRAIN:
            # loss for intent detection
            self.intent_loss = \
                tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.intent_layer,
                                                               labels=self.intent,
                                                               name="intent_loss")
            self.intent_loss = tf.reduce_mean(self.intent_loss)

            # loss for sequence nlu
            self.label_loss = softmax_sequence_loss(
                logits=self.label_layer,
                targets=self.label,
                sequence_length=self.query_len)
            self.label_loss = tf.reduce_mean(self.label_loss)

            # loss for entity linking
            self.link_loss = hinge_loss(scores=self.link_score,
                                        row=self.config.get("batch_size"),
                                        col=self.config.get("PN"),
                                        margin=self.config.get("margin"))

            # train op, currently three losses have equal weights
            self.train_op = get_optimizer(
                self.config.get("optimizer"),
                self.config.get("lr")).minimize(self.intent_loss +
                                                self.label_loss +
                                                self.link_loss)
Exemplo n.º 7
0
class SkBiRNNModule(SkBaseModule):
    def __init__(self, path_max_len, dim_item_hidden, dim_kb_emb,
                 dim_sk_hidden, data_source, rnn_config):
        super(SkBiRNNModule, self).__init__(path_max_len=path_max_len,
                                            dim_item_hidden=dim_item_hidden,
                                            dim_kb_emb=dim_kb_emb,
                                            dim_sk_hidden=dim_sk_hidden)
        self.data_source = data_source
        assert self.data_source in ('kb', 'word', 'both')

        rnn_config['num_units'] = dim_sk_hidden / 2
        self.rnn_encoder = BidirectionalRNNEncoder(
            rnn_config, mode=tf.contrib.learn.ModeKeys.TRAIN)

    # Input:
    #   path_wd_hidden: (batch, path_max_len, dim_item_hidden)
    #   path_kb_hidden: (batch, path_max_len, dim_kb_emb)
    #   path_len: (batch, ) as int32
    #   focus_wd_hidden: (batch, dim_item_hidden)
    #   focus_kb_hidden: (batch, dim_kb_emb)
    # Output:
    #   sk_hidden: (batch, dim_sk_hidden)
    def forward(self,
                path_wd_hidden,
                path_kb_hidden,
                path_len,
                focus_wd_hidden,
                focus_kb_hidden,
                reuse=None):
        LogInfo.begin_track('SkBiRNNModule forward: ')

        with tf.variable_scope('SkBiRNNModule', reuse=reuse):
            if self.data_source == 'kb':
                use_path_hidden = path_kb_hidden
                use_focus_hidden = focus_kb_hidden
            elif self.data_source == 'word':
                use_path_hidden = path_wd_hidden
                use_focus_hidden = focus_wd_hidden
            else:
                use_path_hidden = tf.concat([path_kb_hidden, path_wd_hidden],
                                            axis=-1,
                                            name='use_path_hidden')
                # (batch, path_max_len, dim_item_hidden + dim_kb_hidden)
                use_focus_hidden = tf.concat(
                    [focus_kb_hidden, focus_wd_hidden],
                    axis=-1,
                    name='use_focus_hidden')
                # (batch, dim_item_hidden + dim_kb_hidden)

            use_path_emb_input = tf.concat(
                [tf.expand_dims(use_focus_hidden, axis=1), use_path_hidden],
                axis=1,
                name='use_path_emb_input'
            )  # (batch, path_max_len + 1, dim_use)
            show_tensor(use_path_emb_input)
            use_path_len = path_len + 1
            stamps = self.path_max_len + 1
            birnn_inputs = tf.unstack(use_path_emb_input,
                                      num=stamps,
                                      axis=1,
                                      name='birnn_inputs')
            encoder_output = self.rnn_encoder.encode(
                inputs=birnn_inputs, sequence_length=use_path_len, reuse=reuse)
            rnn_outputs = tf.stack(
                encoder_output.outputs, axis=1,
                name='rnn_outputs')  # (batch, path_max_len + 1, dim_sk_hidden)

            # Since we are in the BiRNN mode, we are simply taking average.

            sum_sk_hidden = tf.reduce_sum(
                rnn_outputs, axis=1,
                name='sum_sk_hidden')  # (batch, dim_sk_hidden)
            use_path_len_mat = tf.cast(
                tf.expand_dims(use_path_len, axis=1),
                dtype=tf.float32,
                name='use_path_len_mat')  # (batch, 1) as float32
            sk_hidden = tf.div(sum_sk_hidden,
                               use_path_len_mat,
                               name='sk_hidden')  # (batch, dim_sk_hidden)

        LogInfo.end_track()
        return sk_hidden