def build_lstm_cell(inps, num_layers, num_units):
    lstms = [LSTMCell(num_units, dtype=tf.float32) for _ in range(num_layers)]
    multilayer_lstm = tf.contrib.rnn.MultiRNNCell(lstms)
    zero_state = multilayer_lstm.zero_state(tf.shape(inps)[1], tf.float32)
    logits, _ = tf.nn.dynamic_rnn(multilayer_lstm,
                                  inps,
                                  initial_state=zero_state,
                                  parallel_iterations=1024,
                                  time_major=True)
    return logits
Exemplo n.º 2
0
        def cal_loss_logit(embedded, keep_prob, reuse=True, scope="loss"):
            with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope:
                rnn_outputs, _ = bi_rnn(LSTMCell(self.hidden_size),
                                        LSTMCell(self.hidden_size),
                                        inputs=embedded, dtype=tf.float32)

                # Attention
                H = tf.add(rnn_outputs[0], rnn_outputs[1])  # fw + bw
                M = tf.tanh(H)  # M = tanh(H)  (batch_size, seq_len, HIDDEN_SIZE)
                # alpha (bs * sl, 1)
                alpha = tf.nn.softmax(tf.matmul(tf.reshape(M, [-1, self.hidden_size]),
                                                tf.reshape(W, [-1, 1])))
                r = tf.matmul(tf.transpose(H, [0, 2, 1]), tf.reshape(alpha, [-1, self.max_len,
                                                                             1]))  # supposed to be (batch_size * HIDDEN_SIZE, 1)
                r = tf.squeeze(r)
                h_star = tf.tanh(r)
                drop = tf.nn.dropout(h_star, keep_prob)

                # Fully connected layer(dense layer)
                y_hat = tf.nn.xw_plus_b(drop, W_fc, b_fc,name='logits')

            return y_hat, tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_hat, labels=self.label))
Exemplo n.º 3
0
def add_cell_lstm(inps, state, num_units, input_dim, init_parameter):
    stddevs = compute_stddevs(num_units, input_dim, init_parameter)
    lstms = [
        LSTMCell(nu,
                 dtype=tf.float32,
                 state_is_tuple=False,
                 initializer=tf.truncated_normal_initializer(stddev=stddev))
        for nu, stddev in zip(num_units, stddevs)
    ]
    multilayer_lstm = tf.contrib.rnn.MultiRNNCell(lstms, state_is_tuple=False)
    # print("(add_cell_lstm)state:", state)
    # print("(add_cell_lstm)multilayer_lstm.state_size:", multilayer_lstm.state_size)
    state = prepare_init_state(state, inps, multilayer_lstm, 'cell')
    if state is None:
        state = multilayer_lstm.zero_state(tf.shape(inps)[1], tf.float32)

    # print("(add_cell_lstm)multilayer_lstm.state:", multilayer_lstm.state)
    output, state = tf.nn.dynamic_rnn(multilayer_lstm,
                                      inps,
                                      initial_state=state,
                                      parallel_iterations=1024,
                                      time_major=True)
    return output, state
    def _create_model(self, mode, input_ids, input_mask, segment_ids, labels,
                      slot_labels, labels_mask, drop_keep_prob,
                      entity_type_ids, sequence_lengths):
        """Creates a LaserTagger model."""
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = modeling.BertModel(
            config=self._config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=self._use_one_hot_embeddings)

        final_layer = model.get_sequence_output()
        # final_hidden = model.get_pooled_output()

        if is_training:
            # I.e., 0.1 dropout
            # final_hidden = tf.nn.dropout(final_hidden, keep_prob=drop_keep_prob)
            final_layer = tf.nn.dropout(final_layer, keep_prob=drop_keep_prob)

        # 结合实体信息
        batch_size, seq_length = modeling.get_shape_list(input_ids)

        self.entity_type_embedding = tf.get_variable(
            name="entity_type_embedding",
            shape=(self.entity_type_num, self._config.hidden_size),
            dtype=tf.float32,
            trainable=True,
            initializer=tf.random_uniform_initializer(
                -self._config.initializer_range * 100,
                self._config.initializer_range * 100,
                seed=20))

        with tf.init_scope():
            impact_weight_init = tf.constant(1.0 / self.entity_type_num,
                                             dtype=tf.float32,
                                             shape=(1, self.entity_type_num))
        self.impact_weight = tf.Variable(impact_weight_init,
                                         dtype=tf.float32,
                                         name="impact_weight")  # 不同类型的影响权重
        impact_weight_matrix = tf.tile(self.impact_weight,
                                       multiples=[batch_size * seq_length, 1])

        entity_type_ids_matrix1 = tf.cast(tf.reshape(
            entity_type_ids, [batch_size * seq_length, self.entity_type_num]),
                                          dtype=tf.float32)
        entity_type_ids_matrix = tf.multiply(entity_type_ids_matrix1,
                                             impact_weight_matrix)
        entity_type_emb = tf.matmul(entity_type_ids_matrix,
                                    self.entity_type_embedding)
        final_layer = final_layer + tf.reshape(entity_type_emb, [
            batch_size, seq_length, self._config.hidden_size
        ])  # TODO TODO    # 0.7071067811865476是二分之根号二
        # final_layer = tf.concat([final_layer, tf.reshape(entity_type_emb, [batch_size, seq_length,self._config.hidden_size])], axis=-1)

        if is_training:
            final_layer = tf.nn.dropout(final_layer, keep_prob=drop_keep_prob)

        (output_fw_seq,
         output_bw_seq), ((c_fw, h_fw),
                          (c_bw, h_bw)) = tf.nn.bidirectional_dynamic_rnn(
                              cell_fw=LSTMCell(self.lstm_hidden_size),
                              cell_bw=LSTMCell(self.lstm_hidden_size),
                              inputs=final_layer,
                              sequence_length=sequence_lengths,
                              dtype=tf.float32)
        layer_matrix = tf.concat([output_fw_seq, output_bw_seq], axis=-1)
        final_hidden = tf.concat([c_fw, c_bw], axis=-1)

        layer_matrix = tf.contrib.layers.layer_norm(inputs=layer_matrix,
                                                    begin_norm_axis=-1,
                                                    begin_params_axis=-1)

        intent_logits = tf.layers.dense(
            final_hidden,
            self._num_tags,
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
            name="output_projection")
        slot_logits = tf.layers.dense(
            layer_matrix,
            self.num_slot_tags,
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
            name="slot_projection")

        with tf.variable_scope("loss"):
            loss = None
            per_example_intent_loss = None
            per_example_slot_loss = None
            if mode != tf.estimator.ModeKeys.PREDICT:
                per_example_intent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=labels, logits=intent_logits)
                slot_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=slot_labels, logits=slot_logits)
                per_example_slot_loss = tf.truediv(
                    tf.reduce_sum(slot_loss, axis=1),
                    tf.cast(tf.reduce_sum(labels_mask, axis=1), tf.float32))

                # from tensorflow.contrib.crf import crf_log_likelihood
                # from tensorflow.contrib.crf import viterbi_decode
                # batch_size = tf.shape(slot_logits)[0]
                # print(curLine(), batch_size, tf.constant([self._max_seq_length]))
                # length_batch = tf.tile(tf.constant([self._max_seq_length]), [batch_size])
                # print(curLine(), batch_size, "length_batch:", length_batch)
                # per_example_slot_loss, self.transition_params = crf_log_likelihood(inputs=slot_logits,
                #                 tag_indices=slot_labels,sequence_lengths=length_batch)
                # print(curLine(), "per_example_slot_loss:", per_example_slot_loss) # shape=(batch_size,)
                # print(curLine(), "self.transition_params:", self.transition_params) # shape=(9, 9)

                loss = tf.reduce_mean(self.intent_ratio *
                                      per_example_intent_loss +
                                      self.slot_ratio * per_example_slot_loss)
            pred_intent = tf.cast(tf.argmax(intent_logits, axis=-1), tf.int32)
            pred_slot = tf.cast(tf.argmax(slot_logits, axis=-1), tf.int32)
            return (loss, per_example_slot_loss, pred_intent, pred_slot,
                    batch_size, entity_type_emb, impact_weight_matrix,
                    entity_type_ids_matrix, final_layer, slot_logits)
Exemplo n.º 5
0
with tf.name_scope('Embedding Layer'):
    embedding = tf.Variable(tf.random_uniform([vocabulary_size, embed_size], -1, 1))
    embeded = tf.nn.embedding_lookup(embedding, batch_ph)
    tf.summary.histogram('embedding', embedding)
'''

##RNN layers
lstm_size = 32
lstm_layers = 3

output = batch_ph
for i in range(lstm_layers):
    with tf.variable_scope('BiLSTM_Layer_{}'.format(i)):
        lstm_fw = LSTMCell(
            lstm_size
        )  #, initializer=tf.truncated_normal_initializer(-0.1, 0.1, seed=2)
        lstm_bw = LSTMCell(lstm_size)
        cell_fw = tf.contrib.rnn.DropoutWrapper(
            lstm_fw, output_keep_prob=keep_prob_ph_rnn)
        cell_bw = tf.contrib.rnn.DropoutWrapper(
            lstm_bw, output_keep_prob=keep_prob_ph_rnn)

        (output_fw, output_bw), final_state = BiRNN(cell_fw,
                                                    cell_bw,
                                                    output,
                                                    dtype=tf.float32)
        output = tf.concat((output_fw, output_bw), 2)

##Attention + Dropout
with tf.variable_scope('BiLSTM_Layer_{}'.format(lstm_layers)):
Exemplo n.º 6
0
    def build_graph(self):
        """ Build the main architecture of the graph. """
        random.seed(310)
        tf.set_random_seed(902)
        print("building graph")

        with tf.variable_scope('model', reuse=self.reuse):
            ### Lookup ELMo Embedding ###
            self.x_elmo = layers.Lambda(
                lambda inputs: ElmoEmbedding(inputs, elmo_model),
                output_shape=(1024, ))(self.x_elmo_input)

            shape = tf.shape(self.x_elmo)
            self.shape = shape
            #            self.glove = tf.Variable(tf.random_uniform([tf.shape(self.glove)[0], self.embed_dimensions], -1.0, 1.0),trainable=True)

            if self.glove_include:
                ### Lookup Glove Vectors ###
                batch_embedded = tf.nn.embedding_lookup(self.glove, self.x)
                batch_embedded = batch_embedded[:, -shape[1]:, :]

                ### Include POS ###
                if self.pos_include:
                    ### POS-TAG Embedding ###
                    embeddings_var = tf.Variable(tf.random_uniform(
                        [12, self.pos_dimensions], -1.0, 1.0),
                                                 trainable=True)
                    self.pos_embedding = tf.nn.embedding_lookup(
                        embeddings_var, self.pos)

                    self.pos_embedded = self.pos_embedding[:, -shape[1]:, :]
                    batch_embedded = tf.concat(
                        [batch_embedded, self.pos_embedded], axis=2)

                if self.layer_1_include:
                    hid = 2 * self.hidden_size

                    if self.layer_1 == 'lstm':
                        rnn_outputs, _ = bi_rnn(
                            LSTMCell(self.hidden_size,
                                     use_peepholes=self.peephole_1),
                            LSTMCell(self.hidden_size,
                                     use_peepholes=self.peephole_2),
                            inputs=batch_embedded,
                            dtype=tf.float32,
                            scope='rnn_1')

                        fw_outputs, bw_outputs = rnn_outputs
                        layer = tf.concat([fw_outputs, bw_outputs], axis=2)
                    elif self.layer_1 == 'gru':
                        rnn_outputs, _ = bi_rnn(GRUCell(self.hidden_size),
                                                GRUCell(self.hidden_size),
                                                inputs=batch_embedded,
                                                dtype=tf.float32,
                                                scope='rnn_1')

                        fw_outputs, bw_outputs = rnn_outputs
                        layer = tf.concat([fw_outputs, bw_outputs], axis=2)
                    else:
                        conv_layer = tf.layers.conv1d(
                            inputs=batch_embedded,
                            filters=self.hidden_size * 2,
                            kernel_size=self.kernel_size,
                            strides=1,
                            padding="same",
                            activation=tf.nn.relu)
                        layer = conv_layer
                else:
                    layer = batch_embedded
                    hid = self.hidden_size
                    if self.pos_include:
                        hid += self.pos_dimensions

            print(self.hidden_size)

            # FLAGS Including ELMO and Glove
            if self.glove_include and self.elmo:
                H_1 = tf.concat([layer, self.x_elmo], axis=2)
                hid += 1024
            elif self.glove_include:
                H_1 = layer
            elif self.elmo:
                H_1 = self.x_elmo
                hid = 1024

            if self.layer_2 == 'lstm':
                rnn_outputs_2, _ = bi_rnn(
                    LSTMCell(hid, use_peepholes=self.peephole_3),
                    LSTMCell(hid, use_peepholes=self.peephole_4),
                    inputs=H_1,
                    dtype=tf.float32,
                    scope='rnn_2')

                fw_outputs_2, bw_outputs_2 = rnn_outputs_2
                H = tf.concat([fw_outputs_2, bw_outputs_2], axis=2)
            elif self.layer_2 == 'gru':
                rnn_outputs_2, _ = bi_rnn(GRUCell(hid),
                                          GRUCell(hid),
                                          inputs=H_1,
                                          dtype=tf.float32,
                                          scope='rnn_2')

                fw_outputs_2, bw_outputs_2 = rnn_outputs_2
                H = tf.concat([fw_outputs_2, bw_outputs_2], axis=2)
            elif self.layer_2 == 'conv':
                conv_layer = tf.layers.conv1d(inputs=H_1,
                                              filters=hid,
                                              kernel_size=self.kernel_size,
                                              strides=1,
                                              padding="same",
                                              activation=tf.nn.relu)
                H = conv_layer
                hid = tf.cast(hid / 2, tf.int32)
            else:
                H = H_1
                hid = tf.cast(hid / 2, tf.int32)

            hid *= 2

            ### Ask whether there is a sequence with length 0 ###
            condition = tf.equal(tf.reduce_min(self.seq_len), 0)

            ### FLAG Including attention ###
            if self.attention:
                with tf.variable_scope('attention', reuse=self.reuse):
                    M = tf.tanh(
                        H)  # M = tanh(H)  (batch_size, seq_len, HIDDEN_SIZE)

                    dropout_layer_attention = tf.layers.dropout(
                        inputs=tf.reshape(M, [-1, hid]),
                        rate=self.attention_prob,
                        training=self.is_training,
                        seed=847)
                    self.dense = tf.layers.dense(
                        inputs=dropout_layer_attention,
                        units=self.num_attention,
                        use_bias=False)
                    ### Pool - Max or Mean ###
                    if self.pool_mean:
                        self.pool = tf.reduce_mean(self.dense, axis=1)
                    else:
                        self.pool = tf.reduce_max(self.dense, axis=1)

                    ### Setting for stride 2 ###
                    #self.alpha = tf.exp(tf.reshape(self.pool,
                    #         [-1, tf.cast(tf.round(tf.add(tf.div(tf.cast(shape[1], dtype = tf.float32), 2.0), 0.1)),
                    #                      dtype = tf.int32)]))
                    self.alpha = tf.exp(tf.reshape(self.pool, [-1, shape[1]]))

                    ### Masking the sequences ###
                    if self.mask:
                        with tf.variable_scope('mask', reuse=self.reuse):
                            self.alpha = tf.reverse(self.alpha, axis=[1])
                            mask = tf.sequence_mask(self.seq_len)
                            mask = tf.to_float(mask)

                            self.alpha = tf.cond(condition, lambda: self.alpha,
                                                 lambda: self.alpha * mask)
                            self.alpha = tf.reverse(self.alpha, axis=[1])

                    #### Softmax ####
                    self.alpha = self.alpha / tf.expand_dims(
                        tf.reduce_sum(self.alpha, axis=1), 1)

                    ### Derive the word with the highest attention ###
                    pos = tf.argmax(self.alpha, axis=1)
                    sparse_tensor = tf.string_split(self.x_elmo_input)
                    dense_tensor = tf.sparse_tensor_to_dense(sparse_tensor, '')
                    rg = tf.range(0, shape[0])
                    indices = tf.transpose([rg, tf.cast(pos, tf.int32)],
                                           [1, 0])
                    self.best_example = tf.gather_nd(dense_tensor, indices)

                    ### Computing weighted average ###
                    # r = tf.matmul(tf.transpose(H, [0, 2, 1]), tf.reshape(self.alpha,
                    #                                                      [-1, tf.cast(tf.round(tf.add(
                    #                                                          tf.div(tf.cast(shape[1], dtype=tf.float32),
                    #                                                                 2.0), 0.1)),
                    #                                                                   dtype=tf.int32), 1]))
                    r = tf.matmul(tf.transpose(H, [0, 2, 1]),
                                  tf.reshape(self.alpha, [-1, shape[1], 1]))
                    r = tf.squeeze(r, axis=2)
            else:
                with tf.variable_scope('rnn_average', reuse=self.reuse):
                    ### Take a simple mean of all the words (INCLUDING padding) ###
                    ### Masking the sequences ###
                    if self.mask:
                        with tf.variable_scope('mask', reuse=self.reuse):
                            self.alpha = tf.cond(
                                condition,
                                lambda: tf.tile(tf.expand_dims(shape[1], 0),
                                                tf.expand_dims(shape[0], 0)),
                                lambda: self.seq_len)
                            self.alpha = tf.reciprocal(tf.to_float(self.alpha))
                            self.alpha = tf.tile(tf.expand_dims(self.alpha, 1),
                                                 [1, shape[1]])

                            self.alpha = tf.reverse(self.alpha, axis=[1])
                            mask = tf.sequence_mask(self.seq_len)
                            mask = tf.to_float(mask)

                            self.alpha = tf.cond(condition, lambda: self.alpha,
                                                 lambda: self.alpha * mask)
                            self.alpha = tf.reverse(self.alpha, axis=[1])
                    else:
                        self.alpha = tf.tile(tf.expand_dims(shape[1], 0),
                                             tf.expand_dims(shape[0], 0))
                        self.alpha = tf.reciprocal(tf.to_float(self.alpha))
                        self.alpha = tf.tile(tf.expand_dims(self.alpha, 1),
                                             [1, shape[1]])

                    ### Necessarily here but serves no purpose - Derive the word with the highest attention ###
                    pos = tf.argmax(self.alpha, axis=1)
                    sparse_tensor = tf.string_split(self.x_elmo_input)
                    dense_tensor = tf.sparse_tensor_to_dense(sparse_tensor, '')
                    rg = tf.range(0, shape[0])
                    indices = tf.transpose([rg, tf.cast(pos, tf.int32)],
                                           [1, 0])
                    self.best_example = tf.gather_nd(dense_tensor, indices)

                    ### Computing average ###
                    r = tf.matmul(tf.transpose(H, [0, 2, 1]),
                                  tf.reshape(self.alpha, [-1, shape[1], 1]))
                    r = tf.squeeze(r, axis=2)

            self.h_star = tf.tanh(r)  # (batch , HIDDEN_SIZE)
Exemplo n.º 7
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(
                        map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="xx")
                        else:
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=tf.random_normal_initializer)
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(
                            axis=0, values=[word_emb_mat, self.new_emb_mat])

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat,
                                                self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:
                    xx = tf.concat(axis=3, values=[xx, Ax])  # [N, M, JX, di]
                    qq = tf.concat(axis=2, values=[qq, Aq])  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq

        cell_fw = LSTMCell(d, state_is_tuple=True, name="basic_lstm_cell")
        cell_bw = LSTMCell(d, state_is_tuple=True, name="basic_lstm_cell")
        d_cell_fw = SwitchableDropoutWrapper(
            cell_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell_bw = SwitchableDropoutWrapper(
            cell_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell2_fw = LSTMCell(d, state_is_tuple=True, name="basic_lstm_cell")
        cell2_bw = LSTMCell(d, state_is_tuple=True, name="basic_lstm_cell")
        d_cell2_fw = SwitchableDropoutWrapper(
            cell2_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell2_bw = SwitchableDropoutWrapper(
            cell2_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell3_fw = LSTMCell(d, state_is_tuple=True, name="basic_lstm_cell")
        cell3_bw = LSTMCell(d, state_is_tuple=True, name="basic_lstm_cell")
        d_cell3_fw = SwitchableDropoutWrapper(
            cell3_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell3_bw = SwitchableDropoutWrapper(
            cell3_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell4_fw = LSTMCell(d, state_is_tuple=True, name="basic_lstm_cell")
        cell4_bw = LSTMCell(d, state_is_tuple=True, name="basic_lstm_cell")
        d_cell4_fw = SwitchableDropoutWrapper(
            cell4_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell4_bw = SwitchableDropoutWrapper(
            cell4_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell_fw,
                                             d_cell_bw,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(axis=2, values=[fw_u, bw_u])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, xx, x_len, dtype='float',
                    scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, xx, x_len, dtype='float',
                    scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]),
                               [N * M, JQ, 2 * d])
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell_fw = AttentionCell(
                    cell2_fw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                first_cell_bw = AttentionCell(
                    cell2_bw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                second_cell_fw = AttentionCell(
                    cell3_fw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                second_cell_bw = AttentionCell(
                    cell3_bw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0 = attention_layer(config,
                                     self.is_train,
                                     h,
                                     u,
                                     h_mask=self.x_mask,
                                     u_mask=self.q_mask,
                                     scope="p0",
                                     tensor_dict=self.tensor_dict)
                first_cell_fw = d_cell2_fw
                second_cell_fw = d_cell3_fw
                first_cell_bw = d_cell2_bw
                second_cell_bw = d_cell3_bw

            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell_fw,
                first_cell_bw,
                p0,
                x_len,
                dtype='float',
                scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(axis=3, values=[fw_g0, bw_g0])
            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(
                second_cell_fw,
                second_cell_bw,
                g0,
                x_len,
                dtype='float',
                scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat(axis=3, values=[fw_g1, bw_g1])

            logits = get_logits([g1, p0],
                                d,
                                True,
                                wd=config.wd,
                                input_keep_prob=config.input_keep_prob,
                                mask=self.x_mask,
                                is_train=self.is_train,
                                func=config.answer_func,
                                scope='logits1')
            a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]),
                          tf.reshape(logits, [N, M * JX]))
            a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1),
                          [1, M, JX, 1])

            (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(
                d_cell4_fw,
                d_cell4_bw,
                tf.concat(axis=3, values=[p0, g1, a1i, g1 * a1i]),
                x_len,
                dtype='float',
                scope='g2')  # [N, M, JX, 2d]
            g2 = tf.concat(axis=3, values=[fw_g2, bw_g2])
            logits2 = get_logits([g2, p0],
                                 d,
                                 True,
                                 wd=config.wd,
                                 input_keep_prob=config.input_keep_prob,
                                 mask=self.x_mask,
                                 is_train=self.is_train,
                                 func=config.answer_func,
                                 scope='logits2')

            flat_logits = tf.reshape(logits, [-1, M * JX])
            flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
            flat_logits2 = tf.reshape(logits2, [-1, M * JX])
            flat_yp2 = tf.nn.softmax(flat_logits2)

            if config.na:
                na_bias = tf.get_variable("na_bias", shape=[], dtype='float')
                na_bias_tiled = tf.tile(tf.reshape(na_bias, [1, 1]),
                                        [N, 1])  # [N, 1]
                concat_flat_logits = tf.concat(
                    axis=1, values=[na_bias_tiled, flat_logits])
                concat_flat_yp = tf.nn.softmax(concat_flat_logits)
                na_prob = tf.squeeze(tf.slice(concat_flat_yp, [0, 0], [-1, 1]),
                                     [1])
                flat_yp = tf.slice(concat_flat_yp, [0, 1], [-1, -1])

                concat_flat_logits2 = tf.concat(
                    axis=1, values=[na_bias_tiled, flat_logits2])
                concat_flat_yp2 = tf.nn.softmax(concat_flat_logits2)
                na_prob2 = tf.squeeze(
                    tf.slice(concat_flat_yp2, [0, 0], [-1, 1]), [1])  # [N]
                flat_yp2 = tf.slice(concat_flat_yp2, [0, 1], [-1, -1])

                self.concat_logits = concat_flat_logits
                self.concat_logits2 = concat_flat_logits2
                self.na_prob = na_prob * na_prob2

            yp = tf.reshape(flat_yp, [-1, M, JX], name="yp")
            yp2 = tf.reshape(flat_yp2, [-1, M, JX], name="yp2")
            wyp = tf.nn.sigmoid(logits2, name="wyp")

            self.tensor_dict['g1'] = g1
            self.tensor_dict['g2'] = g2

            self.logits = flat_logits
            self.logits2 = flat_logits2
            self.yp = yp
            self.yp2 = yp2
            self.wyp = wyp
                                       dtype=tf.int32,
                                       name='encoder_inputs_length')

decoder_targets = tf.placeholder(shape=(None, None),
                                 dtype=tf.int32,
                                 name='decoder_targets')

embeddings = tf.get_variable('embedding',
                             shape=(vocab_size, input_embedding_size),
                             dtype=tf.float32,
                             initializer=tf.initializers.random_uniform(
                                 -1.0, 1.0))
encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs)

# encoder
encoder_cell = LSTMCell(encoder_hidden_units)
(encoder_fw_outputs, encoder_bw_outputs), (
    encoder_fw_final_state,
    encoder_bw_final_state) = tf.nn.bidirectional_dynamic_rnn(
        cell_fw=encoder_cell,
        cell_bw=encoder_cell,
        inputs=encoder_inputs_embedded,
        sequence_length=encoder_inputs_length,
        dtype=tf.float32,
        time_major=True)

# 融合双向 LSTM 的状态
encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), axis=2)
encoder_final_state_c = tf.concat(
    (encoder_fw_final_state.c, encoder_bw_final_state.c), axis=1)
encoder_final_state_h = tf.concat(
Exemplo n.º 9
0
# y=tf.unstack(y)
# tf.reset_default_graph()
encode_input = tf.placeholder(shape=[None, None],
                              dtype=tf.int32,
                              name='encode_input')
decode_target = tf.placeholder(shape=[None, None],
                               dtype=tf.int32,
                               name='encode_input')
decode_input = tf.placeholder(shape=[None, None],
                              dtype=tf.int32,
                              name='encode_input')
embedding = tf.Variable(tf.random_uniform([4, 10], -1.0, 1.0),
                        dtype=tf.float32)  #生成词汇表,前面是字符数量,后面是词嵌入大小
encode_embedding = tf.nn.embedding_lookup(embedding, encode_input)
decode_embedding = tf.nn.embedding_lookup(embedding, decode_input)
lstm_cell = LSTMCell(4)
outputs, states = dynamic_rnn(lstm_cell, encode_embedding, dtype=tf.float32)
print('states is ', states)
# y=tf.unstack(y,4,1)/
lstm_cell2 = LSTMCell(num_units=4)
logit, states2 = dynamic_rnn(lstm_cell2,
                             decode_embedding,
                             dtype=tf.float32,
                             initial_state=states,
                             scope='decode_output')

print('2')
la = tf.one_hot(y_target, depth=4, dtype=tf.float32)
print(la)
pre = tf.nn.softmax(logit)
print('logit is ', logit)
Exemplo n.º 10
0
    def _build_lm_graph(hparams, inputs, mode, freeze_bdlm=False, scope=None):

        ids, lens, seq_in, phyche, seq_out = inputs

        seq_dense = tf.layers.dense(inputs=seq_in,
                                    units=25,
                                    use_bias=False,
                                    trainable=not freeze_bdlm,
                                    name="bdlm_seq_dense")

        x = tf.concat([seq_dense, phyche], axis=-1)

        _outputs = []

        with tf.variable_scope(scope or "bdlm_cnn_embed",
                               dtype=tf.float32) as cnn_scope:
            cnn_embed = tf.layers.Conv1D(filters=hparams.num_filters,
                                         kernel_size=hparams.filter_size,
                                         activation=tf.nn.relu,
                                         kernel_regularizer=lambda inp: hparams
                                         .l2_lambda * tf.nn.l2_loss(inp),
                                         trainable=not freeze_bdlm)

            embed_proj = tf.layers.Dense(units=hparams.num_units,
                                         kernel_regularizer=lambda inp: hparams
                                         .l2_lambda * tf.nn.l2_loss(inp),
                                         trainable=not freeze_bdlm)

            z_0 = tf.layers.dropout(
                inputs=cnn_embed(x),
                rate=hparams.dropout,
                training=mode == tf.contrib.learn.ModeKeys.TRAIN)
            z_0 = embed_proj(z_0)

            _outputs.append([z_0, z_0])

        with tf.variable_scope(scope or "bdlm_rnn",
                               dtype=tf.float32) as bdlm_scope:

            _get_cell = lambda name: LSTMCell(name=name,
                                              num_units=hparams.num_lm_units,
                                              num_proj=hparams.num_units,
                                              trainable=not freeze_bdlm)
            _drop_wrap = lambda cell: tf.nn.rnn_cell.DropoutWrapper(
                cell=cell,
                state_keep_prob=1.0 - hparams.recurrent_state_dropout
                if mode == tf.contrib.learn.ModeKeys.TRAIN else 1.0,
                input_keep_prob=1.0 - hparams.recurrent_input_dropout
                if mode == tf.contrib.learn.ModeKeys.TRAIN else 1.0,
                variational_recurrent=True,
                input_size=tf.TensorShape([1]),
                dtype=tf.float32)
            fw_cells = []
            bw_cells = []
            # keep track of unwrapped cells so we can get their weights later
            unwrapped_fw_cells = []
            unwrapped_bw_cells = []
            for i in range(hparams.num_lm_layers):
                fw_cell = _get_cell("lstm_fw_%d" % (i))
                bw_cell = _get_cell("lstm_bw_%d" % (i))
                unwrapped_fw_cells.append(fw_cell)
                unwrapped_bw_cells.append(bw_cell)

                fw_cell = _drop_wrap(fw_cell)
                bw_cell = _drop_wrap(bw_cell)

                # create a residual connection around 1st layer
                if i == 0:
                    fw_cell = tf.nn.rnn_cell.ResidualWrapper(fw_cell)
                    bw_cell = tf.nn.rnn_cell.ResidualWrapper(bw_cell)
                # split fw and bw between GPUs
                if hparams.num_gpus == 2:
                    fw_dev = "/device:GPU:0"
                    bw_dev = "/device:GPU:1"
                    fw_cell = tf.nn.rnn_cell.DeviceWrapper(fw_cell, fw_dev)
                    bw_cell = tf.nn.rnn_cell.DeviceWrapper(bw_cell, bw_dev)
                else:
                    fw_dev = "/device:GPU:0"
                    bw_dev = "/device:GPU:0"
                fw_cells.append(fw_cell)
                bw_cells.append(bw_cell)

            # reverse the bw inputs, then reverse all _outputs after dynamic_rnn
            _outputs[0][1] = tf.reverse_sequence(
                _outputs[0][1],
                seq_lengths=lens +
                tf.constant(hparams.filter_size + 1, dtype=tf.int32),
                seq_axis=1)

            for i in range(hparams.num_lm_layers):
                with tf.name_scope("bdlm_layer_%d" % (i)):
                    # get fw / bw _outputs for each layer
                    input_fw = _outputs[-1][0]
                    input_bw = _outputs[-1][1]

                with tf.device(fw_dev):
                    output_fw, _ = tf.nn.dynamic_rnn(
                        cell=fw_cells[i],
                        inputs=input_fw,
                        sequence_length=lens +
                        tf.constant(hparams.filter_size + 1, dtype=tf.int32),
                        dtype=tf.float32)
                    # add weight reg
                    unwrapped_fw_cells[i].add_loss(
                        tf.multiply(hparams.l2_lambda,
                                    tf.nn.l2_loss(
                                        unwrapped_fw_cells[i].weights[0]),
                                    name="fw_%d_l2w" % (i)))
                with tf.device(bw_dev):
                    output_bw, _ = tf.nn.dynamic_rnn(
                        cell=bw_cells[i],
                        inputs=input_bw,
                        sequence_length=lens +
                        tf.constant(hparams.filter_size + 1, dtype=tf.int32),
                        dtype=tf.float32)
                    unwrapped_bw_cells[i].add_loss(
                        tf.multiply(hparams.l2_lambda,
                                    tf.nn.l2_loss(
                                        unwrapped_bw_cells[i].weights[0]),
                                    name="bw_%d_l2w" % (i)))

                _outputs.append([output_fw, output_bw])

            outputs = []
            for i in range(len(_outputs)):
                # reverse the backward outputs; trim the extra steps from fw/bw and concat
                _outputs[i][1] = tf.reverse_sequence(
                    _outputs[i][1],
                    seq_lengths=lens +
                    tf.constant(hparams.filter_size + 1, dtype=tf.int32),
                    seq_axis=1)
                outputs.append(
                    tf.concat([
                        _outputs[i][0][:, :-(hparams.filter_size + 1), :],
                        _outputs[i][1][:, (hparams.filter_size + 1):, :]
                    ],
                              axis=-1))
            output_fw = outputs[-1][0]
            output_bw = outputs[-1][1]

        with tf.variable_scope("bdlm_out", dtype=tf.float32):
            rnn_out = outputs[-1]

            rnn_out = tf.layers.dropout(
                inputs=rnn_out,
                rate=hparams.dropout,
                training=mode == tf.contrib.learn.ModeKeys.TRAIN)
            logits = tf.layers.dense(inputs=rnn_out,
                                     units=hparams.num_labels,
                                     kernel_regularizer=lambda inp: hparams.
                                     l2_lambda * tf.nn.l2_loss(inp),
                                     trainable=not freeze_bdlm)

        # mask out entries longer than target sequence length
        mask = tf.sequence_mask(lens, dtype=tf.float32)

        # add activity reg to last layer
        with tf.name_scope("l2_act_reg"):
            l2_act_loss = lambda act: tf.reduce_sum(
                tf.reduce_sum(hparams.l2_alpha * tf.square(act) * tf.
                              expand_dims(mask, axis=-1),
                              axis=[1, 2]) / tf.cast(lens, tf.float32))
            # ignore the loss contributed by time steps longer than sequence length
            fw_act_loss = l2_act_loss(output_fw)
            bw_act_loss = l2_act_loss(output_bw)
            unwrapped_fw_cells[-1].add_loss(fw_act_loss, inputs=input_fw)
            unwrapped_bw_cells[-1].add_loss(bw_act_loss, inputs=input_bw)

        crossent = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                              labels=seq_out,
                                                              name="crossent")

        seq_loss = tf.reduce_sum(
            tf.reduce_sum(crossent * mask, axis=1) / tf.cast(
                lens, tf.float32)) / tf.cast(hparams.batch_size, tf.float32)
        reg_loss = tf.add_n(tf.losses.get_regularization_losses(),
                            name="reg_loss")

        if hparams.l2_alpha == 0. and hparams.l2_lambda == 0. and hparams.l2_beta == 0.:
            loss = seq_loss
        else:
            loss = seq_loss + reg_loss

        metrics = []
        update_ops = []
        if mode == tf.contrib.learn.ModeKeys.EVAL:
            # mean eval loss
            loss, loss_update = tf.metrics.mean(values=loss)
            seq_loss, seq_loss_update = tf.metrics.mean(values=seq_loss)
            tf.summary.scalar("eval_seq_loss", seq_loss, collections=["eval"])
            reg_loss, reg_loss_update = tf.metrics.mean(values=reg_loss)
            tf.summary.scalar("eval_reg_loss", reg_loss, collections=["eval"])

            predictions = tf.argmax(input=logits, axis=-1)
            tgt_labels = tf.argmax(input=seq_out, axis=-1)
            acc, acc_update = tf.metrics.accuracy(predictions=predictions,
                                                  labels=tgt_labels,
                                                  weights=mask)
            # final layer activations
            #mean_act_fw, mean_act_fw_update = add_seq_activation_histogram(output_fw, lens, "fw_2")
            #mean_act_bw, mean_act_bw_update = add_seq_activation_histogram(output_bw, lens, "bw_2")

            # confusion matrix
            targets_flat = tf.reshape(tgt_labels, [-1])
            predictions_flat = tf.reshape(predictions, [-1])
            mask_flat = tf.reshape(mask, [-1])
            cm, cm_update = streaming_confusion_matrix(
                labels=targets_flat,
                predictions=predictions_flat,
                num_classes=hparams.num_labels,
                weights=mask_flat)
            tf.add_to_collection("eval", cm_summary(cm, hparams.num_labels))
            metrics = [acc, cm]
            update_ops = [
                loss_update, seq_loss_update, reg_loss_update, acc_update,
                cm_update
            ]  #, mean_act_fw_update, mean_act_bw_update]

        return outputs, logits, loss, metrics, update_ops
Exemplo n.º 11
0
    def __init__(
        self,
        batch_size,
        inputs,
        outputs,
        num_units,
        cell_type
    ):
        """
    Args:
      num_hidden : number of hidden elements of each LSTM unit.
      inputs : a list (tensor array) of input tensors with size hp.num_time_steps*(batch_size,dim)
      cell : an rnn cell object (the default option is tf.python.ops.rnn_cell.LSTMCell)
      reverse : Option to decode in reverse order
      decode_without_input : Option to decode without input - there are zeros coming to the cell instead of input
    """

        self.batch_size = batch_size
        self.num_inputs = inputs[0].get_shape().as_list()[1]
        self.num_outputs = self.num_inputs
        num_time_steps = len(inputs)

        num_hidden = num_units[-1]
        self.last = inputs[-1]

        if len(num_units) > 1:
            cells = [LSTMCell(num_units=n) for n in num_units]
            self._lstm_cell = MultiRNNCell(cells)
        else:
            self._lstm_cell = LSTMCell(num_hidden)

        with tf.compat.v1.variable_scope('encoder') as ec:
            Wy = tf.Variable(tf.random.truncated_normal([num_hidden,
                                                         self.num_outputs], dtype=tf.float32), name='enc_weight'
                             )
            by = tf.Variable(tf.random.truncated_normal([self.num_outputs],
                                                        dtype=tf.float32), name='enc_bias')

            init_states = []
            for i in range(len(num_units)):
                init_c = tf.zeros((batch_size, num_units[i]))
                init_h = init_c
                layer = tf.contrib.rnn.LSTMStateTuple(init_c, init_h)
                init_states.append(layer)
            init_states = tuple(init_states)

            if len(num_units) > 1:
                lstm_state = init_states
            else:
                lstm_state = init_states[0]

            lstm_outputs = []
            for step in range(len(inputs)):
                if step > 0:
                    ec.reuse_variables()
                lstm_input = inputs[step]
                (lstm_output, lstm_state) = self._lstm_cell(
                    lstm_input, lstm_state)
            for step in range(len(outputs)):
                lstm_input = tf.matmul(lstm_output, Wy) + by
                lstm_outputs.append(lstm_input)
                (lstm_output, lstm_state) = self._lstm_cell(
                    lstm_input, lstm_state)

            self.prediction = tf.transpose(
                tf.stack(lstm_outputs), [1, 0, 2], name='prediction')
            self.target = tf.transpose(
                tf.stack(outputs), [1, 0, 2], name='target')
            self.input_ = tf.transpose(tf.stack(inputs), [1, 0, 2])
            self.prediction = self.prediction[:, :, 0]
            self.target = self.target[:, :, 0]
            self.enc_W = Wy
            self.enc_b = by
Exemplo n.º 12
0
    def __init__(
        self,
        batch_size,
        inputs,
        outputs,
        num_units,
        cell_type
    ):
        """
    Args:
      inputs : a list (tensor array) of input tensors with size hp.num_time_steps*(batch_size,dim)
      cell : an rnn cell object (the default option is tf.python.ops.rnn_cell.LSTMCell)
      reverse : Option to decode in reverse order
      decode_without_input : Option to decode without input - there are zeros coming to the cell instead of input
    """

        self.batch_size = batch_size
        self.num_inputs = inputs[0].get_shape().as_list()[1]
        self.num_outputs = self.num_inputs

        num_hidden = num_units[-1]

        if len(num_units) > 1:
            if cell_type == 'GRU':
                cells = [GRUCell(num_units=n) for n in num_units]
            else:
                cells = [LSTMCell(num_units=n) for n in num_units]
            self._enc_cell = MultiRNNCell(cells)
            self._dec_cell = MultiRNNCell(cells)
        else:
            if cell_type == 'GRU':
                self._enc_cell = GRUCell(num_hidden)
                self._dec_cell = GRUCell(num_hidden)
            else:
                self._enc_cell = LSTMCell(num_hidden)
                self._dec_cell = LSTMCell(num_hidden)

        # , initializer=tf.contrib.layers.xavier_initializer()
        with tf.compat.v1.variable_scope('encoder') as es:
            enc_W = tf.Variable(tf.random.truncated_normal([num_hidden,
                                                            self.num_outputs], dtype=tf.float32), name='enc_weight'
                                )
            enc_b = tf.Variable(tf.random.truncated_normal([self.num_outputs],
                                                           dtype=tf.float32), name='enc_bias')

            init_states = []
            if cell_type == 'GRU':
                for i in range(len(num_units)):
                    layer = tf.zeros((batch_size, num_units[i]))
                    init_states.append(layer)
            else:
                # make the zero initial cell and hidden state as a tuple - in the shape LSTM cell expects it to be
                for i in range(len(num_units)):
                    init_c = tf.zeros((batch_size, num_units[i]))
                    init_h = init_c
                    layer = tf.contrib.rnn.LSTMStateTuple(init_c, init_h)
                    init_states.append(layer)
                init_states = tuple(init_states)

            if len(num_units) > 1:
                enc_state = init_states
            else:
                enc_state = init_states[0]

            enc_predictions = []
            for step in range(len(inputs)):
                if step > 0:
                    es.reuse_variables()
                enc_input = inputs[step]
                (enc_output, enc_state) = self._enc_cell(
                    enc_input, enc_state)  # lstm_output = hidden state, lstm_state = tuple(cell state, hidden state)
                #y_hat = Wy*h + by
                enc_prediction = tf.matmul(enc_output, enc_W) + enc_b
                enc_predictions.append(enc_prediction)

        with tf.compat.v1.variable_scope('decoder') as vs:
            dec_W = tf.Variable(tf.random.truncated_normal([num_hidden,
                                                            self.num_outputs], dtype=tf.float32), name='dec_weight'
                                )

            dec_b = tf.Variable(tf.random.truncated_normal([self.num_outputs],
                                                           dtype=tf.float32), name='dec_bias')

            dec_input = enc_prediction
            dec_state = enc_state
            dec_outputs = []
            for step in range(len(outputs)):
                if step > 0:
                    vs.reuse_variables()
                (dec_input, dec_state) = self._dec_cell(
                    dec_input, dec_state)
                dec_input = tf.matmul(dec_input, dec_W) + dec_b
                dec_outputs.append(dec_input)
            self.prediction = tf.transpose(
                tf.stack(dec_outputs), [1, 0, 2], name='prediction')

        self.input_ = tf.transpose(tf.stack(inputs), [1, 0, 2])
        self.target = tf.transpose(tf.stack(outputs), [1, 0, 2], name='target')
        self.prediction = self.prediction[:, :, 0]
        self.target = self.target[:, :, 0]
        self.enc_W = enc_W
        self.enc_b = enc_b
        self.dec_W = dec_W
        self.dec_b = dec_b
Exemplo n.º 13
0
def _LSTMCells(unit_list, act_fn_list):
    return MultiRNNCell([
        LSTMCell(unit, activation=act_fn)
        for unit, act_fn in zip(unit_list, act_fn_list)
    ])
Exemplo n.º 14
0
output = embeded
'''
for i in range(lstm_layers):
    with tf.variable_scope('BiLSTM_Layer_{}'.format(i)):
        lstm_fw = LSTMCell(lstm_size) #, initializer=tf.truncated_normal_initializer(-0.1, 0.1, seed=2)
        lstm_bw = LSTMCell(lstm_size)
        cell_fw = tf.contrib.rnn.DropoutWrapper(lstm_fw, output_keep_prob=keep_prob_ph_rnn)
        cell_bw = tf.contrib.rnn.DropoutWrapper(lstm_bw, output_keep_prob=keep_prob_ph_rnn)

        (output_fw, output_bw), final_state = BiRNN(cell_fw, cell_bw, output, dtype=tf.float32)
        output = tf.concat((output_fw, output_bw), 2)
'''

##Attention + Dropout
with tf.variable_scope('BiLSTM_Layer_{}'.format(lstm_layers)):
    lstm_fw = LSTMCell(lstm_size)
    lstm_bw = LSTMCell(lstm_size)
    (output_fw, output_bw), final_state = BiRNN(lstm_fw,
                                                lstm_bw,
                                                output,
                                                dtype=tf.float32)
    output = tf.concat((output_fw, output_bw), 2)
    attention = Attention(output)
    drop = tf.nn.dropout(attention, keep_prob_ph)
    tf.summary.histogram('RNN_output', output)

##FC layers
with tf.name_scope('Fully_connected_Layers_0'):
    fc_output = tf.contrib.layers.fully_connected(drop,
                                                  64,
                                                  activation_fn=tf.nn.relu)