示例#1
0
    def rnn(self):
        """rnn模型"""

        # 词向量映射
        with tf.device('/cpu:0'):
            embedding = tf.get_variable('embedding',
                                        initializer=self.word_embedding)
            embedding_inputs = tf.nn.embedding_lookup(embedding, self.input_x)
            embedding_inputs = tf.cast(embedding_inputs, tf.float32)
        with tf.name_scope("rnn"):
            # 多层rnn网络
            #cells = [dropout() for _ in range(self.config.num_layers)]
            #rnn_cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)
            #_outputs, _ = tf.nn.dynamic_rnn(cell=rnn_cell, inputs=embedding_inputs, dtype=tf.float32)
            #last = _outputs[:, -1, :]  # 取最后一个时序输出作为结果

            # (Bi-)RNN layer(-s)
            rnn_outputs, _ = bi_rnn(GRUCell(self.config.hidden_dim),
                                    GRUCell(self.config.hidden_dim),
                                    inputs=embedding_inputs,
                                    dtype=tf.float32)
            #tf.summary.histogram('RNN_outputs', rnn_outputs)
        # Attention layer
        with tf.name_scope('Attention_layer'):
            attention_output, alphas = attention(rnn_outputs,
                                                 self.config.attention_size,
                                                 return_alphas=True)
            tf.summary.histogram('alphas', alphas)
            # Dropout
            drop = tf.nn.dropout(attention_output, self.keep_prob)

        with tf.name_scope("score"):
            # 全连接层,后面接dropout以及relu激活
            W = tf.Variable(
                tf.truncated_normal(
                    [self.config.hidden_dim * 2, self.config.num_classes],
                    stddev=0.1))  # Hidden size is multiplied by 2 for Bi-RNN
            b = tf.Variable(tf.constant(0., shape=[self.config.num_classes]))
            self.logits = tf.nn.xw_plus_b(drop, W, b)
            #fc = tf.contrib.layers.dropout(fc, self.keep_prob)
            #fc = tf.nn.relu(fc)

            # 分类器
            #self.logits = tf.layers.dense(fc, self.config.num_classes, name='fc2')
            self.y_pred_cls = tf.argmax(tf.nn.softmax(self.logits), 1)  # 预测类别

        with tf.name_scope("optimize"):
            # 损失函数,交叉熵
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                logits=self.logits, labels=self.input_y)
            self.loss = tf.reduce_mean(cross_entropy)
            # 优化器
            self.optim = tf.train.AdamOptimizer(
                learning_rate=self.config.learning_rate).minimize(self.loss)

        with tf.name_scope("accuracy"):
            # 准确率
            correct_pred = tf.equal(tf.argmax(self.input_y, 1),
                                    self.y_pred_cls)
            self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
示例#2
0
 def cal_loss_logit(self,
                    batch_embedded,
                    keep_prob,
                    W,
                    W_fc,
                    b_fc,
                    batch_y,
                    reuse=True,
                    scope="loss"):
     with tf.variable_scope(scope, reuse=reuse) as scope:
         rnn_outputs, _ = bi_rnn(BasicLSTMCell(self.config.hidden_dim),
                                 BasicLSTMCell(self.config.hidden_dim),
                                 inputs=batch_embedded,
                                 dtype=tf.float32)
         # Attention
         ATTENTION_SIZE = 50
         attention_output, alphas = attention(rnn_outputs,
                                              ATTENTION_SIZE,
                                              return_alphas=True)
         drop = tf.nn.dropout(attention_output, keep_prob)
         # Fully connected layer
         y_hat = tf.nn.xw_plus_b(drop, W_fc, b_fc)
         y_hat = tf.squeeze(y_hat)
     return y_hat, tf.reduce_mean(
         tf.nn.softmax_cross_entropy_with_logits(logits=y_hat,
                                                 labels=batch_y))
示例#3
0
        def cal_loss_logit(embedded, keep_prob, reuse=True, scope="loss"):
            with tf.variable_scope(scope, reuse=reuse) as scope:
                rnn_outputs, _ = bi_rnn(BasicLSTMCell(self.hidden_size),
                                        BasicLSTMCell(self.hidden_size),
                                        inputs=embedded,
                                        dtype=tf.float32)

                # Attention
                H = tf.add(rnn_outputs[0], rnn_outputs[1])  # fw + bw
                M = tf.tanh(
                    H)  # M = tanh(H)  (batch_size, seq_len, HIDDEN_SIZE)
                # alpha (bs * sl, 1)
                alpha = tf.nn.softmax(
                    tf.matmul(tf.reshape(M, [-1, self.hidden_size]),
                              tf.reshape(W, [-1, 1])))
                r = tf.matmul(tf.transpose(
                    H, [0, 2, 1]), tf.reshape(
                        alpha,
                        [-1, self.max_len, 1
                         ]))  # supposed to be (batch_size * HIDDEN_SIZE, 1)
                r = tf.squeeze(r)
                h_star = tf.tanh(r)
                drop = tf.nn.dropout(h_star, keep_prob)

                # Fully connected layer(dense layer)
                y_hat = tf.nn.xw_plus_b(drop, W_fc, b_fc)

            return y_hat, tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=y_hat, labels=self.label))
示例#4
0
    def bi_gru_att(self):
        sen_inputs_glove = self.embedding_layer()
        fw_cell = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.GRUCell(
            self.lstm_units),
                                                output_keep_prob=0.75)
        bw_cell = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.GRUCell(
            self.lstm_units),
                                                output_keep_prob=0.75)

        rnn_outputs, _ = bi_rnn(fw_cell,
                                bw_cell,
                                inputs=sen_inputs_glove,
                                dtype=tf.float32)

        fw_outputs, bw_outputs = rnn_outputs
        value = fw_outputs + bw_outputs
        self.comput_att(value)

        r = tf.matmul(tf.transpose(value, [0, 2, 1]),
                      tf.reshape(self.alpha, [-1, self.max_len, 1]))

        r = tf.squeeze(r)
        h_star = tf.tanh(r)
        h_drop = tf.nn.dropout(h_star, self.dropout_keep_prob)

        return h_drop
示例#5
0
    def build_graph(self):
        print("building graph")
        # Word embedding
        embeddings_var = tf.Variable(tf.random_uniform(
            [self.vocab_size, self.embedding_size], -1.0, 1.0),
                                     trainable=True)
        batch_embedded = tf.nn.embedding_lookup(embeddings_var, self.x)

        rnn_outputs, _ = bi_rnn(BasicLSTMCell(self.hidden_size),
                                BasicLSTMCell(self.hidden_size),
                                inputs=batch_embedded,
                                dtype=tf.float32)

        fw_outputs, bw_outputs = rnn_outputs

        W = tf.Variable(tf.random_normal([self.hidden_size], stddev=0.1))
        H = fw_outputs + bw_outputs  # (batch_size, seq_len, HIDDEN_SIZE)
        M = tf.tanh(H)  # M = tanh(H)  (batch_size, seq_len, HIDDEN_SIZE)

        self.alpha = tf.nn.softmax(
            tf.reshape(
                tf.matmul(tf.reshape(M, [-1, self.hidden_size]),
                          tf.reshape(W, [-1, 1])),
                (-1, self.max_len)))  # batch_size x seq_len
        r = tf.matmul(tf.transpose(H, [0, 2, 1]),
                      tf.reshape(self.alpha, [-1, self.max_len, 1]))
        r = tf.squeeze(r)
        h_star = tf.tanh(r)  # (batch , HIDDEN_SIZE

        h_drop = tf.nn.dropout(h_star, self.keep_prob)

        # Fully connected layer(dense layer)
        FC_W = tf.Variable(
            tf.truncated_normal([self.hidden_size, self.n_class], stddev=0.1))
        FC_b = tf.Variable(tf.constant(0., shape=[self.n_class]))
        y_hat = tf.nn.xw_plus_b(h_drop, FC_W, FC_b)

        self.loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_hat,
                                                           labels=self.label))

        # prediction
        self.prediction = tf.argmax(tf.nn.softmax(y_hat), 1)

        # optimization
        loss_to_minimize = self.loss
        tvars = tf.trainable_variables()
        gradients = tf.gradients(
            loss_to_minimize,
            tvars,
            aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE)
        grads, global_norm = tf.clip_by_global_norm(gradients, 1.0)

        self.global_step = tf.Variable(0, name="global_step", trainable=False)
        self.optimizer = tf.train.MomentumOptimizer(self.learning_rate,
                                                    0.9,
                                                    use_nesterov=True)
        self.train_op = self.optimizer.apply_gradients(
            zip(grads, tvars), global_step=self.global_step, name='train_step')
        print("graph built successfully!")
示例#6
0
 def build_model(self):
     with tf.name_scope("ner_layer"):
         lstm_cell_fw = tf.nn.rnn_cell.DropoutWrapper(
             tf.nn.rnn_cell.LSTMCell(self.hidden_size),
             output_keep_prob=self.keep_prob)
         lstm_cell_bw = tf.nn.rnn_cell.DropoutWrapper(
             tf.nn.rnn_cell.LSTMCell(self.hidden_size),
             output_keep_prob=self.keep_prob)
         with tf.variable_scope("ner_layer", reuse=tf.AUTO_REUSE):
             # 直接建立多个网络堆叠再输出数据
             for i in range(self.num_layers):
                 (output_fw, output_bw), _ = bi_rnn(
                     lstm_cell_fw,
                     lstm_cell_bw,
                     self.embedded_layer,
                     sequence_length=self.sequence_lengths,
                     dtype=tf.float32)
                 # [batch_size, sequence_length, hidden_size * 2]
                 # 取出所有的隐藏层输出,一般对于序列标注就是这样,如果是分类直接取最后一个隐藏状态就可以
                 self.outputs = tf.concat((output_fw, output_bw), 2)
             self.outputs = tf.nn.dropout(self.outputs, self.keep_prob)
             self.outputs = tf.reshape(self.outputs,
                                       [-1, 2 * self.hidden_size])
             self.logits = tf.matmul(
                 self.outputs, self.weight_variable) + self.bias_variable
             self.logits = tf.reshape(
                 self.logits,
                 [-1, self.io_sequence_size, self.output_class_size])
    def cal_loss_logit(batch_embedded, keep_prob, reuse=True, scope="loss"):
        with tf.variable_scope(scope, reuse=reuse) as scope:
            rnn_outputs, _ = bi_rnn(BasicLSTMCell(HIDDEN_SIZE),
                                    BasicLSTMCell(HIDDEN_SIZE),
                                    inputs=batch_embedded,
                                    dtype=tf.float32)

            # Attention

            H = tf.add(rnn_outputs[0], rnn_outputs[1])  # fw + bw
            M = tf.tanh(H)  # M = tanh(H)  (batch_size, seq_len, HIDDEN_SIZE)
            print(M.shape)
            # alpha (bs * sl, 1)
            alpha = tf.nn.softmax(
                tf.matmul(tf.reshape(M, [-1, HIDDEN_SIZE]),
                          tf.reshape(W, [-1, 1])))
            r = tf.matmul(
                tf.transpose(H, [0, 2, 1]),
                tf.reshape(alpha,
                           [-1, MAX_DOCUMENT_LENGTH, 1
                            ]))  # supposed to be (batch_size * HIDDEN_SIZE, 1)
            print(r.shape)
            r = tf.squeeze(r)
            h_star = tf.tanh(r)  # (batch , HIDDEN_SIZE
            # attention_output, alphas = attention(rnn_outputs, ATTENTION_SIZE, return_alphas=True)
            drop = tf.nn.dropout(h_star, keep_prob)

            # Fully connected layer(dense layer)

            y_hat = tf.nn.xw_plus_b(drop, W_fc, b_fc)

        return y_hat, tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=y_hat,
                                                    labels=batch_y))
示例#8
0
    def model(self):
        voc_size = len(self.word2idx) + 1
        pretrained_embed = load_pretrained_embed("CBOW_iter15_2017-2018.bin",
                                                 self.embed_size,
                                                 self.word2idx)
        embed_matrix = tf.get_variable(
            name='embedding_matrix',
            shape=[voc_size, self.embed_size],
            initializer=tf.constant_initializer(pretrained_embed),
            dtype=tf.float32)
        embed = tf.nn.embedding_lookup(embed_matrix, self.text)

        # RNN layer
        (fw_outputs, bw_outputs), _ = bi_rnn(GRUCell(self.hidden_size),
                                             GRUCell(self.hidden_size),
                                             inputs=embed,
                                             dtype=tf.float32)
        rnn_outputs = tf.concat((fw_outputs, bw_outputs), axis=2)
        # Attention layer
        attention_output, self.alpha = attention(rnn_outputs,
                                                 self.attention_size)
        sentence_vector = tf.layers.dropout(attention_output,
                                            self.dropout_rate)

        self.logits = tf.layers.dense(inputs=sentence_vector,
                                      units=self.num_classes,
                                      name='logits')
示例#9
0
    def call(self, data, keep_prob=0.8):
        # data : [batch_size,data_length]
        max_sentence_length = data.shape[1]
        print('data.shape[1]:', data.shape[1])

        with tf.variable_scope('embedding_layer'), tf.device("/cpu:0"):
            embedding = tf.get_variable(
                'embedding',
                shape=[self.vocab_size, self.embed_size],
                initializer=tf.initializers.random_uniform(-1.0, 1.0))

            tf.summary.histogram('embeddings_var', embedding)
            # w2v : [batch_size,max_sentence_length,embed_size]
            data = tf.cast(data, dtype=tf.int32)
            w2v = tf.nn.embedding_lookup(embedding, data)

        with tf.variable_scope('bilstm_layer'):
            # final_outputs is tuple
            final_outputs, final_state = bi_rnn(GRUCell(self.hidden_size),
                                                GRUCell(self.hidden_size),
                                                inputs=w2v,
                                                dtype=tf.float32)

            tf.summary.histogram('RNN_outputs', final_outputs)

            if self.sentence_mode == SentenceMode.ATTENTION:
                attention_ = Attention(final_outputs,
                                       self.attention_size,
                                       time_major=False,
                                       return_alphas=True)
                outputs, alphas = attention_.attentionModel()
                # outputs :[batc_size,vocab_size]
                tf.summary.histogram('alphas', alphas)

            elif self.sentence_mode == SentenceMode.FINAL_STATE:
                final_state_fw, final_state_bw = final_state
                # outputs = tf.concat([final_state_fw, final_state_bw], axis=-1)
                outputs = tf.concat(final_state, 2)

            else:
                raise ValueError("sentence mode `{0}` dose not "
                                 "supported on gru model.".format(
                                     self.sentence_mode))

        with tf.variable_scope('fully_connected_layer'):
            # rnn_output = [batch_size,sentence_length]
            rnn_output = tf.nn.dropout(outputs, keep_prob=keep_prob)
            # h : [batch_size,sentence_length]
            print('rnn_output.shape:', rnn_output.shape)
            h = tf.layers.Dense(rnn_output.shape.as_list()[-1],
                                activation=tf.nn.relu)(rnn_output)
            # h = tf.layers.Dense(64,activation=tf.nn.relu)(rnn_output)
            print('h.shape:', h.shape)
            # logits:[batch_size,num_targets]
            logits = tf.layers.Dense(self.num_targets)(h)
            print('logits.shape:', logits.shape)

        return logits
示例#10
0
    def bi_gru_embedding(self, batch_embedding):

        rnn_outputs, _ = bi_rnn(tf.contrib.rnn.GRUCell(self.lstm_units),
                                tf.contrib.rnn.GRUCell(self.lstm_units),
                                inputs=batch_embedding,
                                dtype=tf.float32)
        fw_outputs, bw_outputs = rnn_outputs

        return tf.multiply(fw_outputs, bw_outputs)
示例#11
0
    def build_lstm(self, input_tensor):

        rnn_outputs, _ = bi_rnn(
            BasicLSTMCell(self.config["lstm_para"]["hidden_size"]),
            BasicLSTMCell(self.config["lstm_para"]["hidden_size"]),
            inputs=input_tensor,
            dtype=tf.float32)
        fw_outputs, bw_outputs = rnn_outputs

        W = tf.Variable(
            tf.random_normal([self.config["lstm_para"]["hidden_size"]],
                             stddev=0.1))
        H = fw_outputs + bw_outputs  # (batch_size, seq_len, HIDDEN_SIZE)
        M = tf.tanh(H)  # M = tanh(H)  (batch_size, seq_len, HIDDEN_SIZE)

        alpha = tf.nn.softmax(
            tf.reshape(
                tf.matmul(
                    tf.reshape(M,
                               [-1, self.config["lstm_para"]["hidden_size"]]),
                    tf.reshape(W, [-1, 1])),
                (-1,
                 self.config["lstm_para"]["max_len"])))  # batch_size x seq_len
        r = tf.matmul(
            tf.transpose(H, [0, 2, 1]),
            tf.reshape(alpha, [-1, self.config["lstm_para"]["max_len"], 1]))
        r = tf.squeeze(r)
        h_star = tf.tanh(r)  # (batch , HIDDEN_SIZE

        h_drop = tf.nn.dropout(h_star, self.config["lstm_para"]["keep_prob"])

        # Fully connected layer(dense layer)
        FC_W = tf.Variable(
            tf.truncated_normal([
                self.config["lstm_para"]["hidden_size"],
                self.config['label_size']
            ],
                                stddev=0.1))
        FC_b = tf.Variable(tf.constant(0., shape=[self.config['label_size']]))
        logits = tf.nn.xw_plus_b(h_drop, FC_W, FC_b)

        # prediction
        probabilities = tf.argmax(tf.nn.softmax(logits), 1)
        predict_label_ids = tf.argmax(logits, axis=1,
                                      name="predict_label_id")  # 预测结果

        pooled_outputs = []
        l2_loss = tf.constant(0.0)  # 先不用,写0
        l2_loss += tf.nn.l2_loss(FC_W) + tf.nn.l2_loss(FC_b)
        # with tf.variable_scope("output"):
        #     output_w = tf.get_variable("output_w", shape=[hidden_size, self.config['label_size']])
        #     output_b =  self.initialize_bias("output_b", shape=self.config['label_size'])
        #     logits = tf.nn.xw_plus_b(output_layer, output_w, output_b)
        #
        # probabilities = tf.nn.softmax(logits, axis=-1)
        # predict_label_ids = tf.argmax(logits, axis=1, name="predict_label_id")  # 预测结果
        return logits, predict_label_ids, l2_loss, probabilities
示例#12
0
def build_attention_model():
    # Different placeholders
    with tf.name_scope('Inputs'):
        batch_ph = tf.placeholder(tf.int32, [None, SEQUENCE_LENGTH], name='batch_ph')
        target_ph = tf.placeholder(tf.float32, [None], name='target_ph')
        seq_len_ph = tf.placeholder(tf.int32, [None], name='seq_len_ph')
        keep_prob_ph = tf.placeholder(tf.float32, name='keep_prob_ph')

    # Embedding layer
    with tf.name_scope('Embedding_layer'):
        embeddings_var = tf.Variable(tf.random_uniform([vocabulary_size, EMBEDDING_DIM], -1.0, 1.0), trainable=True)
        tf.summary.histogram('embeddings_var', embeddings_var)
        batch_embedded = tf.nn.embedding_lookup(embeddings_var, batch_ph)

    # (Bi-)RNN layer(-s)
    rnn_outputs, _ = bi_rnn(GRUCell(HIDDEN_UNITS), GRUCell(HIDDEN_UNITS),
                            inputs=batch_embedded, sequence_length=seq_len_ph, dtype=tf.float32)
    tf.summary.histogram('RNN_outputs', rnn_outputs)

    # Attention layer
    with tf.name_scope('Attention_layer'):
        attention_output, alphas = attention(rnn_outputs, ATTENTION_UNITS, return_alphas=True)
        tf.summary.histogram('alphas', alphas)

    # Dropout
    drop = tf.nn.dropout(attention_output, keep_prob_ph)

    # Fully connected layer
    with tf.name_scope('Fully_connected_layer'):
        W = tf.Variable(
            tf.truncated_normal([HIDDEN_UNITS * 2, 1], stddev=0.1))  # Hidden size is multiplied by 2 for Bi-RNN
        b = tf.Variable(tf.constant(0., shape=[1]))
        y_hat = tf.nn.xw_plus_b(drop, W, b)
        y_hat = tf.squeeze(y_hat)
        tf.summary.histogram('W', W)

    with tf.name_scope('Metrics'):
        # Cross-entropy loss and optimizer initialization
        loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_hat, labels=target_ph))
        tf.summary.scalar('loss', loss)
        optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss)

        # Accuracy metric
        accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(tf.sigmoid(y_hat)), target_ph), tf.float32))
        tf.summary.scalar('accuracy', accuracy)

    merged = tf.summary.merge_all()

    # Batch generators
    train_batch_generator = batch_generator(X_train, y_train, BATCH_SIZE)
    test_batch_generator = batch_generator(X_test, y_test, BATCH_SIZE)
    session_conf = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
    saver = tf.train.Saver()
    return batch_ph, target_ph, seq_len_ph, keep_prob_ph, alphas, loss, accuracy, optimizer, merged, \
           train_batch_generator, test_batch_generator, session_conf, saver
    def __init__(self, num_classes, embedding_size, init_embed, hidden_size, \
                 attention_size, max_sent_len, keep_prob):
        # word index
        self.input_x = tf.placeholder(tf.int32, [None, max_sent_len],
                                      name="input_x")
        # output probability
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.sequence_length = tf.placeholder(tf.int32, [None],
                                              name="input_len")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")

        with tf.variable_scope('style_discriminator'):
            # embedding layer with initialization
            with tf.name_scope("embedding"):
                # trainable embedding
                W = tf.Variable(init_embed, name="W", dtype=tf.float32)
                self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)

            # RNN layer + attention
            with tf.name_scope("bi-rnn"):
                rnn_outputs, _ = bi_rnn(GRUCell(hidden_size), GRUCell(hidden_size),\
                                        inputs=self.embedded_chars, sequence_length=self.sequence_length, \
                                        dtype=tf.float32)
                attention_outputs, self.alphas = attention(rnn_outputs,
                                                           attention_size,
                                                           return_alphas=True)
                drop_outputs = tf.nn.dropout(attention_outputs, keep_prob)

            # Fully connected layer
            with tf.name_scope("fc-layer"):
                W = tf.Variable(tf.truncated_normal(
                    [drop_outputs.get_shape()[1].value, num_classes],
                    stddev=0.1),
                                name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_classes]),
                                name="b")
                self.scores = tf.sigmoid(tf.nn.xw_plus_b(drop_outputs, W, b),
                                         name="scores")

            # mean square error
            with tf.name_scope("mse"):
                self.loss = tf.reduce_mean(
                    tf.square(tf.subtract(self.scores, self.input_y)))

        self.params = [
            param for param in tf.trainable_variables()
            if 'style_discriminator' in param.name
        ]
        sd_optimizer = tf.train.AdamOptimizer(1e-4)
        grads_and_vars = sd_optimizer.compute_gradients(self.loss,
                                                        self.params,
                                                        aggregation_method=2)
        self.train_op = sd_optimizer.apply_gradients(grads_and_vars)
示例#14
0
    def __init__(self, sequence_length, num_classes, channel_num,
                 rnn_hidden_size, attention_size):
        self.input_x = tf.placeholder(tf.float32,
                                      [None, sequence_length, channel_num],
                                      name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")

        # Bidirectional RNN
        self.rnn_outputs, _ = bi_rnn(GRUCell(rnn_hidden_size),
                                     GRUCell(rnn_hidden_size),
                                     inputs=self.input_x,
                                     dtype=tf.float32)

        # Attention layer and a dropout layer
        with tf.name_scope('Attention_layer'):
            self.att_output, alphas = self.attention(
                inputs=self.rnn_outputs, attention_size=attention_size)
            tf.summary.histogram('alphas', alphas)
        with tf.name_scope("dropout"):
            self.att_drop = tf.nn.dropout(self.att_output,
                                          self.dropout_keep_prob,
                                          name="dropout")

        # FC layer
        with tf.name_scope("output"):
            #            FC_W = tf.get_variable("FC_W", shape=[rnn_hidden_size * 2, num_classes],
            #                                   initializer=tf.contrib.layers.xavier_initializer())
            FC_W = tf.get_variable(
                "FC_W",
                shape=[sequence_length * rnn_hidden_size * 2, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            FC_b = tf.Variable(tf.constant(0.1, shape=[num_classes]),
                               name="FC_b")
            self.fc_out = tf.nn.xw_plus_b(self.att_drop,
                                          FC_W,
                                          FC_b,
                                          name="FC_out")
            self.scores = tf.nn.softmax(self.fc_out, name='scores')
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                logits=self.fc_out, labels=self.input_y)
            self.loss = tf.reduce_mean(losses, name='loss')

        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
示例#15
0
 def __call__(self, batch_embedding):
     if self.basic_cell in self.cell_dic:
         rnn_outputs, _ = bi_rnn(
             self.cell_dic[self.basic_cell](self.hidden_size),
             self.cell_dic[self.basic_cell](self.hidden_size),
             inputs=batch_embedding,
             dtype=tf.float32)
         print("Rnn encoder with " + self.basic_cell)
     else:
         rnn_outputs, _ = bi_rnn(GRUCell(self.hidden_size),
                                 GRUCell(self.hidden_size),
                                 inputs=batch_embedding,
                                 dtype=tf.float32)
         print("Rnn encoder with default GRU cell")
     if self.keep_ori:
         return tf.concat([rnn_outputs[0], rnn_outputs[1]], axis=-1)
     if self.with_attention_layer:
         print("Build a Self-Attention Layer")
         return attention(rnn_outputs, self.keep_prob), self.hidden_size
     return tf.reduce_mean(rnn_outputs[0] + rnn_outputs[1],
                           1), self.hidden_size
    def _build_graph(self):
        config = self.config
        # 定义双向rnn
        rnn_outputs, _ = bi_rnn(
            self.rnn_cell(config),
            self.rnn_cell(config),
            inputs=self.batch_embedded,
            dtype=tf.float32,
        )

        fw_outputs, bw_outputs = rnn_outputs

        W = tf.Variable(tf.random_normal([config.hidden_size], stddev=0.1))
        H = fw_outputs + bw_outputs  # (batch_size, seq_len, HIDDEN_SIZE)
        M = tf.tanh(H)  # M = tanh(H)  (batch_size, seq_len, HIDDEN_SIZE)

        alpha = tf.nn.softmax(
            tf.matmul(tf.reshape(M, [-1, config.hidden_size]),
                      tf.reshape(W, [-1, 1])))
        r = tf.matmul(tf.transpose(H, [0, 2, 1]),
                      tf.reshape(alpha, [-1, config.max_len, 1]))
        r = tf.squeeze(r)
        h_star = tf.tanh(r)  # (batch , HIDDEN_SIZE

        h_drop = tf.nn.dropout(h_star, self.keep_prob)

        # Fully connected layer(dense layer)
        FC_W = tf.Variable(
            tf.truncated_normal([config.hidden_size, config.n_class],
                                stddev=0.1))
        FC_b = tf.Variable(tf.constant(0., shape=[config.n_class]))
        self.y_hat = tf.nn.xw_plus_b(h_drop, FC_W, FC_b)

        # 定义loss 和 train_op
        self.loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.y_hat,
                                                           labels=self.label))

        # optimization
        loss_to_minimize = self.loss
        tvars = tf.trainable_variables()
        gradients = tf.gradients(
            loss_to_minimize,
            tvars,
            aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE,
        )
        grads, global_norm = tf.clip_by_global_norm(gradients, 1.0)

        optimizer = tf.train.AdamOptimizer(learning_rate=config.learning_rate)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars),
                                                  global_step=self.global_step,
                                                  name="train_step")
示例#17
0
    def model(self):
        # (Bi-GRU) layers
        rnn_outputs, _ = bi_rnn(GRUCell(self.hidden_size),
                                GRUCell(self.hidden_size),
                                inputs=self.batch_embedded,
                                dtype=tf.float32)
        tf.summary.histogram('RNN_outputs', rnn_outputs)

        if isinstance(rnn_outputs, tuple):
            rnn_outputs = tf.concat(rnn_outputs, 2)
            print('rnn_outputs.shape:', rnn_outputs.shape)
            rnn_outputs = tf.reduce_mean(rnn_outputs, axis=2)
            print('rnn_outputs.shape:', rnn_outputs.shape)
            self.output = tf.reduce_sum(rnn_outputs, axis=1)
示例#18
0
def RNN_layer(HIDDEN_SIZE, batch_embedded, seq_len_ph):

    rnn_outputs, _ = bi_rnn(GRUCell(HIDDEN_SIZE),
                            GRUCell(HIDDEN_SIZE),
                            inputs=batch_embedded,
                            sequence_length=seq_len_ph,
                            dtype=tf.float32)
    tf.summary.histogram('RNN_outputs', rnn_outputs)

    if isinstance(rnn_outputs, tuple):
        rnn_outputs = tf.concat(rnn_outputs, 2)
    #rnn_outputs = tf.layers.batch_normalization(rnn_outputs)

    return rnn_outputs
示例#19
0
    def create_model(self):
        with tf.name_scope("classification_rnn"):
            outputs, _ = bi_rnn(
                tf.nn.rnn_cell.DropoutWrapper(GRUCell(self.hidden_size), self.keep_prob),
                tf.nn.rnn_cell.DropoutWrapper(GRUCell(self.hidden_size), self.keep_prob),
                inputs=self.embedded_layer,
                dtype=tf.float32)

        outputs = tf.concat(outputs, axis=-1)
        outputs = tf.reduce_mean(outputs, axis=1)
        self.drop = tf.nn.dropout(outputs, self.keep_prob)

        self.logits = tf.matmul(self.drop, self.weight_variable) + self.bias_variable  # [batch_size,num_classes]
        self.prediction = tf.nn.softmax(self.logits)
        self.y_pred_cls = tf.argmax(tf.nn.softmax(self.logits), 1)
示例#20
0
 def biLSTM_layer(self, lstm_inputs, lstm_dim, lengths, name=None):
     """
     :param lstm_inputs: [batch_size, num_steps, emb_size]
     :return: [batch_size, num_steps, 2*lstm_dim]
     """
     with tf.variable_scope("char_BiLSTM" if not name else name):
         outputs, _ = bi_rnn(
             tf.nn.rnn_cell.DropoutWrapper(GRUCell(lstm_dim),
                                           self.dropout_keep),
             tf.nn.rnn_cell.DropoutWrapper(GRUCell(lstm_dim),
                                           self.dropout_keep),
             inputs=lstm_inputs,
             dtype=tf.float32,
             sequence_length=lengths)
     return tf.concat(outputs, axis=2)
示例#21
0
    def __init__(self):
        learning_rate = 0.01
        num_hidden  = 5
        num_classes = 1
        num_input   = 6
        keep_rate_DROPOUT = 1
        self.X = tf.placeholder("float", [1, None, num_input])
        self.Y = tf.placeholder("float", [None, num_classes])
        # Define weights
        self.weights = {
            'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))
        }
        self.biases = {
            'out': tf.Variable(tf.random_normal([num_classes]))
        }
        self.lstm_fw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0,reuse=False)
        self.lstm_fw_cell = rnn.DropoutWrapper(self.lstm_fw_cell,input_keep_prob=keep_rate_DROPOUT, output_keep_prob=keep_rate_DROPOUT, state_keep_prob=keep_rate_DROPOUT)
        # Backward direction cell
        self.lstm_bw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0,reuse=False)
        self.lstm_bw_cell = rnn.DropoutWrapper(self.lstm_bw_cell,input_keep_prob=keep_rate_DROPOUT, output_keep_prob=keep_rate_DROPOUT, state_keep_prob=keep_rate_DROPOUT)
        # Get lstm cell output
        self.LSTM_outputs, _ = bi_rnn(self.lstm_fw_cell, self.lstm_bw_cell, self.X,
                            dtype=tf.float32,scope="bidirectional_rnn") 
        #Attention
        fw_outputs, bw_outputs = self.LSTM_outputs #fw_o and bw_o :(batch_size,windowSize,num_hidden)(76x412x5)
        Hidden_fw_bw = fw_outputs+bw_outputs # (batch_size,windowSize,num_hidden)
        Hidden_fw_bw_t = tf.transpose(Hidden_fw_bw,[0,2,1]) # Doi vi tri cot 2 cho 1: (batch_size,num_hidden,windowSize)
        Hidden_fw_bw_2D = tf.reshape(Hidden_fw_bw,[-1,num_hidden]) # (batch_size*windowSize,num_hidden)
        M=tf.tanh(Hidden_fw_bw_2D) #(batch_size*windowSize,num_hidden)
        W= tf.Variable(tf.random_normal([num_hidden])) #(1,num_hidden)
        W_t=tf.reshape(W,[-1,1]) #(num_hidden,1)
        MxW = tf.matmul(M,W_t) #(batch_size*windowSize,num_hidden)*(num_hidden,1) =  (batch_size*windowSize,1)
        MxW_3D = tf.reshape(MxW,[1,-1,1]) #(batch_size,windowSize,1)
        self.alpha_3D = tf.nn.softmax(MxW_3D,axis=1) # (batch_size,windowSize,1)
        r=tf.matmul(Hidden_fw_bw_t,self.alpha_3D) #(batch_size,num_hidden,windowSize)* (batch_size,windowSize,1) = (batch_size,num_hidden,1)
        r= tf.reshape(r,[-1,num_hidden]) # (batch_size,num_hidden)
        self.h_star = tf.tanh(r) # (batch_size,num_hidden)
        prediction = tf.matmul(self.h_star, self.weights['out']) + self.biases['out']

        self.Prediction_MOS = tf.add(tf.multiply(prediction, 4), 1) 
        self.Label_MOS = tf.add(tf.multiply(self.Y, 4), 1)
        LOSS = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(self.Prediction_MOS, self.Label_MOS))))
        PCC = tf.contrib.metrics.streaming_pearson_correlation(labels=self.Prediction_MOS, predictions=self.Label_MOS, name='pearson_r')
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        train_op = optimizer.minimize(LOSS)
        init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        self.saverModel = tf.train.Saver()
    def model(self):
        # (Bi-GRU) layers
        rnn_outputs, _ = bi_rnn(GRUCell(self.hidden_size),
                                GRUCell(self.hidden_size),
                                inputs=self.batch_embedded,
                                dtype=tf.float32)
        tf.summary.histogram('RNN_outputs', rnn_outputs)

        # Attention layers
        with tf.name_scope('Attention_layer'):
            attention_ = Attention(rnn_outputs,
                                   self.attention_size,
                                   time_major=False,
                                   return_alphas=True)
            self.attention_output, alphas = attention_.attentionModel()
            tf.summary.histogram('alphas', alphas)
            print('attention_output.shape:', self.attention_output.shape)
示例#23
0
    def bi_lstm_att(self):
        sen_inputs_glove = self.embedding_layer()
        rnn_outputs, _ = bi_rnn(
            tf.contrib.rnn.BasicLSTMCell(self.lstm_units),
            tf.contrib.rnn.BasicLSTMCell(self.lstm_units),
            inputs=sen_inputs_glove,
            dtype=tf.float32
        )
        fw_outputs, bw_outputs = rnn_outputs
        W = tf.Variable(tf.random_normal([self.lstm_units], stddev=0.1))
        H = fw_outputs + bw_outputs  # [b_s, max_len, lstm_units]
        M = tf.tanh(H)  # M = tanh(H)  (batch_size, seq_len, lstm_units)
        self.comput_att(H)

        r = tf.matmul(
            tf.transpose(H, [0, 2, 1]),
            tf.reshape(self.alpha, [-1, self.max_len, 1])
        )

        r = tf.squeeze(r)
        h_star = tf.tanh(r)
        h_drop = tf.nn.dropout(h_star, self.dropout_keep_prob)

        return h_drop
    def __init__(self,
                 sequence_length,
                 num_classes,
                 text_vocab_size,
                 text_embedding_size,
                 hidden_size=800,
                 attention_size=100,
                 l2_reg_lambda=0.0):
        # Placeholders for input, output and dropout
        self.input_text = tf.placeholder(tf.int32,
                                         shape=[None, sequence_length],
                                         name='input_text')
        self.input_y = tf.placeholder(tf.float32,
                                      shape=[None, num_classes],
                                      name='input_y')
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name='dropout_keep_prob')
        self.dropout_keep_prob_lstm = tf.placeholder(tf.float32,
                                                     name='dropout_keep_prob')
        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("text-embedding"):
            self.W_text = tf.Variable(tf.random_uniform(
                [text_vocab_size, text_embedding_size], -1.0, 1.0),
                                      name="W_text")
            self.text_embedded_chars = tf.nn.embedding_lookup(
                self.W_text, self.input_text)

        # embedding_size = text_embedding_size + 2 * dist_embedding_size

        # (Bi-)RNN layer(-s)
        self.rnn_outputs, _ = bi_rnn(
            tf.nn.rnn_cell.DropoutWrapper(GRUCell(hidden_size),
                                          self.dropout_keep_prob_lstm),
            tf.nn.rnn_cell.DropoutWrapper(GRUCell(hidden_size),
                                          self.dropout_keep_prob_lstm),
            inputs=self.text_embedded_chars,
            dtype=tf.float32)
        print(self.rnn_outputs)
        tf.summary.histogram('RNN_outputs', self.rnn_outputs)

        # Attention layer
        with tf.name_scope('Attention_layer'):
            attention_output, alphas, self.vu = attention(self.rnn_outputs,
                                                          attention_size,
                                                          return_alphas=True)
            tf.summary.histogram('alphas', alphas)

        print(attention_output)
        # Dropout
        self.drop = tf.nn.dropout(attention_output, self.dropout_keep_prob)

        # Fully connected layer
        with tf.name_scope('Fully_connected_layer'):
            W = tf.Variable(
                tf.truncated_normal(
                    [hidden_size * 2, num_classes],
                    stddev=0.1))  # Hidden size is multiplied by 2 for Bi-RNN
            b = tf.Variable(tf.constant(0., shape=[num_classes]))
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(self.drop, W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                logits=self.scores, labels=self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
示例#25
0
Train_Set, Valid_Set, Test_Set = PMATM.getSplitSets()
Pos_Txt_Index_List = list(np.load(Pos_Txt_Index_List_Path))
Neg_Txt_Index_List = list(np.load(Neg_Txt_Index_List_Path))

tf.reset_default_graph()
labels = tf.placeholder(tf.float32, [batchSize, numClasses])
input_text = tf.placeholder(tf.float32, [batchSize, maxSeqLength, wordDim])
input_emoji = tf.placeholder(tf.float32, [batchSize, wordDim])

# (Bi-)RNN layer(-s)
seq_len_ph = []
for i in range(batchSize):
    seq_len_ph.append(maxSeqLength)
rnn_outputs, _ = bi_rnn(GRUCell(hiddenSize),
                        GRUCell(hiddenSize),
                        inputs=input_text,
                        sequence_length=seq_len_ph,
                        dtype=tf.float32)

memory = tf.concat(rnn_outputs, 2)

attention_input_1 = tf.reduce_mean(input_text, axis=1)


def attention(memory, input):
    input = tf.reshape(input, [batchSize, 1, wordDim])

    inputs = input
    for i in range(memory.shape[1] - 1):
        inputs = tf.concat((inputs, input), 1)
    def _build_graph(self):
        now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        print(now)
        print("Build Graph...")
        print()

        self.xavier_init = tf.contrib.layers.xavier_initializer()

        self.embed_dim = 100
        self.state_dim = 100
        self.bi_state_dim = self.state_dim * 2
        self.feat_dim = self.bi_state_dim
        self.attend_dim = self.feat_dim
        self.context_dim = self.bi_state_dim * 4
        self.fc_dim = 250

        print("embed_dim : %d" % self.embed_dim)
        print("state_dim : %d" % self.state_dim)
        print("bi_state_dim : %d" % self.bi_state_dim)
        print("feat_dim : %d" % self.feat_dim)
        print("attend_dim : %d" % self.attend_dim)
        print("context_dim : %d" % self.context_dim)
        print("fc_dim : %d" % self.fc_dim)
        print()

        with tf.device(self.dev):
            with tf.variable_scope("input_placeholders"):
                self.enc_input1 = tf.placeholder(tf.int32,
                                                 shape=[None, None],
                                                 name="enc_input1")
                self.enc_seq_len1 = tf.placeholder(tf.int32,
                                                   shape=[
                                                       None,
                                                   ],
                                                   name="enc_seq_len1")
                self.enc_input2 = tf.placeholder(tf.int32,
                                                 shape=[None, None],
                                                 name="enc_input2")
                self.enc_seq_len2 = tf.placeholder(tf.int32,
                                                   shape=[
                                                       None,
                                                   ],
                                                   name="enc_seq_len2")
                self.targets = tf.placeholder(tf.int32,
                                              shape=[
                                                  None,
                                              ],
                                              name="targets")
                self.batch_size = tf.placeholder(tf.int32,
                                                 shape=[],
                                                 name="batch_size")
                self.keep_prob = tf.placeholder(tf.float32, name="keep_prob")

            with tf.variable_scope("words_embedding"):
                self.embeddings = tf.get_variable(
                    "embeddings", [self.voc_size, self.embed_dim],
                    initializer=self.xavier_init)
                self.embed_in1 = tf.nn.embedding_lookup(self.embeddings,
                                                        self.enc_input1,
                                                        name="embed_in1")
                self.embed_in2 = tf.nn.embedding_lookup(self.embeddings,
                                                        self.enc_input2,
                                                        name="embed_in2")

                self.pad_mask1 = tf.sequence_mask(self.enc_seq_len1,
                                                  self.input_len_max,
                                                  dtype=tf.float32,
                                                  name="pad_mask1")
                self.pad_mask2 = tf.sequence_mask(self.enc_seq_len2,
                                                  self.input_len_max,
                                                  dtype=tf.float32,
                                                  name="pad_mask2")

            with tf.variable_scope("rnn_encoder_layer") as scope_rnn:
                self.output_enc1, self.state_enc1 = bi_rnn(
                    GRUCell(self.state_dim),
                    GRUCell(self.state_dim),
                    inputs=self.embed_in1,
                    sequence_length=self.enc_seq_len1,
                    dtype=tf.float32)

                self.state_enc1 = tf.concat(
                    [self.state_enc1[0], self.state_enc1[1]],
                    axis=1,
                    name="state_enc1")
                assert self.state_enc1.get_shape()[1] == self.bi_state_dim

                self.output_enc1 = tf.concat(
                    self.output_enc1, axis=2)  # [batch, max_eng, state*2]
                self.output_enc1 = tf.nn.dropout(self.output_enc1,
                                                 keep_prob=self.keep_prob,
                                                 name="output_enc1")
                print("output_enc1.get_shape() : %s" %
                      (self.output_enc1.get_shape()))
                assert self.output_enc1.get_shape()[2] == self.bi_state_dim

                scope_rnn.reuse_variables()

                self.output_enc2, self.state_enc2 = bi_rnn(
                    GRUCell(self.state_dim),
                    GRUCell(self.state_dim),
                    inputs=self.embed_in2,
                    sequence_length=self.enc_seq_len2,
                    dtype=tf.float32)

                self.state_enc2 = tf.concat(
                    [self.state_enc2[0], self.state_enc2[1]],
                    axis=1,
                    name="state_enc2")
                assert self.state_enc2.get_shape()[1] == self.bi_state_dim

                self.output_enc2 = tf.concat(
                    self.output_enc2, axis=2)  # [batch, max_eng, state*2]
                self.output_enc2 = tf.nn.dropout(self.output_enc2,
                                                 keep_prob=self.keep_prob,
                                                 name="output_enc2")
                print("output_enc2.get_shape() : %s" %
                      (self.output_enc2.get_shape()))
                assert self.output_enc2.get_shape()[2] == self.bi_state_dim

            with tf.variable_scope("attention_layer") as scope_attention:
                self.W_y = tf.get_variable(
                    "W_y", [1, 1, self.feat_dim, self.attend_dim],
                    initializer=self.xavier_init)
                self.W_h = tf.get_variable("W_h",
                                           [self.feat_dim, self.attend_dim],
                                           initializer=self.xavier_init)
                self.W_a = tf.get_variable("W_a", [self.attend_dim, 1],
                                           initializer=self.xavier_init)

                # question 1..
                # average vector
                self.R_ave_1 = tf.reduce_mean(self.output_enc1,
                                              axis=1,
                                              name="R_ave_1")
                print("R_ave_1.get_shape() : %s" % (self.R_ave_1.get_shape()))

                # Wy * Y
                self.output_enc1_ex = tf.reshape(
                    self.output_enc1,
                    [-1, self.input_len_max, 1, self.feat_dim])
                self.M_1_left = tf.nn.conv2d(self.output_enc1_ex,
                                             self.W_y,
                                             strides=[1, 1, 1, 1],
                                             padding="SAME")
                self.M_1_left = tf.reshape(
                    self.M_1_left, [-1, self.input_len_max, self.attend_dim])
                print("M_1_left.get_shape() : %s" %
                      (self.M_1_left.get_shape()))

                # Wh * Rave
                self.M_1_right = tf.matmul(self.R_ave_1, self.W_h)
                self.M_1_right = tf.ones([self.input_len_max, 1, 1
                                          ]) * self.M_1_right
                self.M_1_right = tf.transpose(self.M_1_right, [1, 0, 2])
                print("M_1_right.get_shape() : %s" %
                      (self.M_1_right.get_shape()))

                # attention
                self.M_1 = tf.tanh(self.M_1_left + self.M_1_right)
                print("M_1.get_shape() : %s" % (self.M_1.get_shape()))

                self.w_M_1 = tf.matmul(
                    tf.reshape(self.M_1, [-1, self.attend_dim]), self.W_a)
                self.w_M_1 = tf.reshape(self.w_M_1, [-1, self.input_len_max])
                print("w_M_1.get_shape() : %s" % (self.w_M_1.get_shape()))

                self.attention1 = tf.nn.softmax(self.w_M_1) * self.pad_mask1
                self.attention1 = self.attention1 / tf.reshape(
                    tf.reduce_sum(self.attention1, axis=1), [-1, 1])
                print("attention1.get_shape() : %s" %
                      (self.attention1.get_shape()))

                self.context1 = tf.reduce_sum(
                    self.output_enc1 *
                    tf.reshape(self.attention1, [-1, self.input_len_max, 1]),
                    axis=1,
                    name="context1")
                print("context1.get_shape() : %s" %
                      (self.context1.get_shape()))

                # question 2..
                # average vector
                self.R_ave_2 = tf.reduce_mean(self.output_enc2,
                                              axis=1,
                                              name="R_ave_2")
                print("R_ave_2.get_shape() : %s" % (self.R_ave_2.get_shape()))

                # Wy * Y
                self.output_enc2_ex = tf.reshape(
                    self.output_enc2,
                    [-1, self.input_len_max, 1, self.feat_dim])
                self.M_2_left = tf.nn.conv2d(self.output_enc2_ex,
                                             self.W_y,
                                             strides=[1, 1, 1, 1],
                                             padding="SAME")
                self.M_2_left = tf.reshape(
                    self.M_2_left, [-1, self.input_len_max, self.attend_dim])
                print("M_2_left.get_shape() : %s" %
                      (self.M_2_left.get_shape()))

                # Wh * Rave
                self.M_2_right = tf.matmul(self.R_ave_2, self.W_h)
                self.M_2_right = tf.ones([self.input_len_max, 1, 1
                                          ]) * self.M_2_right
                self.M_2_right = tf.transpose(self.M_2_right, [1, 0, 2])
                print("M_2_right.get_shape() : %s" %
                      (self.M_2_right.get_shape()))

                # attention
                self.M_2 = tf.tanh(self.M_2_left + self.M_2_right)
                print("M_2.get_shape() : %s" % (self.M_2.get_shape()))

                self.w_M_2 = tf.matmul(
                    tf.reshape(self.M_2, [-1, self.attend_dim]), self.W_a)
                self.w_M_2 = tf.reshape(self.w_M_2, [-1, self.input_len_max])
                print("w_M_2.get_shape() : %s" % (self.w_M_2.get_shape()))

                self.attention2 = tf.nn.softmax(self.w_M_2) * self.pad_mask2
                self.attention2 = self.attention2 / tf.reshape(
                    tf.reduce_sum(self.attention2, axis=1), [-1, 1])
                print("attention2.get_shape() : %s" %
                      (self.attention2.get_shape()))

                self.context2 = tf.reduce_sum(
                    self.output_enc2 *
                    tf.reshape(self.attention2, [-1, self.input_len_max, 1]),
                    axis=1,
                    name="context2")
                print("context2.get_shape() : %s" %
                      (self.context2.get_shape()))

                assert self.context1.get_shape()[1] == self.feat_dim
                assert self.context2.get_shape()[1] == self.feat_dim

            with tf.variable_scope("final_context_layer"):
                self.features = [
                    self.context1, self.context2,
                    tf.abs(self.context1 - self.context2),
                    (self.context1 * self.context2)
                ]
                self.merged_feature = tf.concat(self.features,
                                                axis=1,
                                                name="merged_feature")
                print("merged_feature.get_shape() : %s" %
                      (self.merged_feature.get_shape()))
                assert self.merged_feature.get_shape()[1] == self.context_dim

            with tf.variable_scope("dense_layer"):
                self.W_out1 = tf.get_variable("W_out1",
                                              [self.context_dim, self.fc_dim],
                                              initializer=self.xavier_init)
                self.bias_out1 = tf.get_variable("bias_out1", [self.fc_dim])
                self.W_out2 = tf.get_variable("W_out2",
                                              [self.fc_dim, self.target_size],
                                              initializer=self.xavier_init)
                self.bias_out2 = tf.get_variable("bias_out2",
                                                 [self.target_size])

                self.fc = tf.nn.xw_plus_b(self.merged_feature, self.W_out1,
                                          self.bias_out1)
                self.fc = tf.tanh(self.fc)
                print("fc.get_shape() : %s" % (self.fc.get_shape()))

                self.y_hat = tf.nn.xw_plus_b(self.fc,
                                             self.W_out2,
                                             self.bias_out2,
                                             name="y_hat")
                print("y_hat.get_shape() : %s" % (self.y_hat.get_shape()))

            with tf.variable_scope("train_optimization"):
                self.train_vars = tf.trainable_variables()

                print()
                print("trainable_variables")
                for varvar in self.train_vars:
                    print(varvar)
                print()

                self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.y_hat, labels=self.targets)
                self.loss = tf.reduce_mean(self.loss, name="loss")
                self.loss_l2 = tf.add_n([
                    tf.nn.l2_loss(v)
                    for v in self.train_vars if "bias" not in v.name
                ]) * 0.0001
                self.loss = self.loss + self.loss_l2

                self.predict = tf.argmax(tf.nn.softmax(self.y_hat), 1)
                self.predict = tf.cast(tf.reshape(self.predict,
                                                  [self.batch_size, 1]),
                                       tf.int32,
                                       name="predict")

                self.target_label = tf.cast(
                    tf.reshape(self.targets, [self.batch_size, 1]), tf.int32)
                self.correct = tf.equal(self.predict, self.target_label)
                self.accuracy = tf.reduce_mean(
                    tf.cast(self.correct, tf.float32))

                self.global_step = tf.Variable(0,
                                               name="global_step",
                                               trainable=False)
                self.decay_rate = tf.maximum(0.00007,
                                             tf.train.exponential_decay(
                                                 self.lr,
                                                 self.global_step,
                                                 1500,
                                                 0.95,
                                                 staircase=True),
                                             name="decay_rate")
                self.opt = tf.train.AdamOptimizer(
                    learning_rate=self.decay_rate)
                self.grads_and_vars = self.opt.compute_gradients(
                    self.loss, self.train_vars)
                self.grads_and_vars = [(tf.clip_by_norm(g, 30.0), v)
                                       for g, v in self.grads_and_vars]
                self.grads_and_vars = [
                    (tf.add(g, tf.random_normal(tf.shape(g), stddev=0.001)), v)
                    for g, v in self.grads_and_vars
                ]

                self.train_op = self.opt.apply_gradients(
                    self.grads_and_vars,
                    global_step=self.global_step,
                    name="train_op")

            if self.makedir == True:
                # Summaries for loss and lr
                self.loss_summary = tf.summary.scalar("loss", self.loss)
                self.accuracy_summary = tf.summary.scalar(
                    "accuracy", self.accuracy)
                self.lr_summary = tf.summary.scalar("lr", self.decay_rate)

                # Output directory for models and summaries
                timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
                self.out_dir = os.path.abspath(
                    os.path.join("./model", timestamp))
                print("LOGDIR = %s" % self.out_dir)
                print()

                # Train Summaries
                self.train_summary_op = tf.summary.merge([
                    self.loss_summary, self.accuracy_summary, self.lr_summary
                ])
                self.train_summary_dir = os.path.join(self.out_dir, "summary",
                                                      "train")
                self.train_summary_writer = tf.summary.FileWriter(
                    self.train_summary_dir, self.sess.graph)

                # Test summaries
                self.test_summary_op = tf.summary.merge([
                    self.loss_summary, self.accuracy_summary, self.lr_summary
                ])
                print(self.test_summary_op)
                self.test_summary_dir = os.path.join(self.out_dir, "summary",
                                                     "test")
                self.test_summary_writer = tf.summary.FileWriter(
                    self.test_summary_dir, self.sess.graph)

                # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
                self.checkpoint_dir = os.path.abspath(
                    os.path.join(self.out_dir, "checkpoints"))
                self.checkpoint_prefix = os.path.join(self.checkpoint_dir,
                                                      "model-step")
                if not os.path.exists(self.checkpoint_dir):
                    os.makedirs(self.checkpoint_dir)

            self.saver = tf.train.Saver(tf.global_variables(),
                                        max_to_keep=None)
        def __graph__():
            with tf.name_scope('input'):
                x_input = tf.placeholder(
                    dtype=tf.float32,
                    shape=[None, sequence_width, sequence_height],
                    name='x_input')
                y_input = tf.placeholder(dtype=tf.float32,
                                         shape=[None, num_classes],
                                         name='y_input')

            # state = tf.placeholder(dtype=tf.float32, shape=[None, self.cell_size * self.num_layers],
            #                        name='initial_state')
            p_keep = tf.placeholder(dtype=tf.float32, name='p_keep')

            learning_rate = tf.placeholder(dtype=tf.float32,
                                           name='learning_rate')

            hidden_size = int(sequence_width)
            # seq_len = tf.Variable(tf.constant(hidden_size),name='seq_len')

            rnn_outputs, _ = bi_rnn(GRUCell(hidden_size),
                                    GRUCell(hidden_size),
                                    inputs=x_input,
                                    sequence_length=None,
                                    dtype=tf.float32)
            tf.summary.histogram('RNN_outputs', rnn_outputs)

            # Attention layer
            with tf.name_scope('Attention_layer'):
                attention_output, alphas = attention(
                    input=rnn_outputs,
                    hidden_size=self.sequence_width,
                    attention_size=ATTENTION_SIZE,
                    return_alpha=True)
                tf.summary.histogram('alphas', alphas)

            # dropout
            drop = tf.nn.dropout(attention_output, keep_prob=p_keep)

            # fully connected layer
            with tf.name_scope('Fully_connected_layer'):
                W = tf.Variable(tf.truncated_normal(
                    [hidden_size * 2, self.num_classes], stddev=0.1),
                                name='W')
                b = tf.Variable(tf.constant(0.0, shape=[self.num_classes]),
                                name='b')
                y_hat = tf.nn.xw_plus_b(drop, W, b)
                # y_hat=tf.squeeze(y_hat)
                tf.summary.histogram('W', W)

            with tf.name_scope('loss'):
                loss = svm_loss(labels=y_input,
                                logits=y_hat,
                                num_classes=self.num_classes,
                                penalty_parameter=self.svm_c,
                                weight=W)
            tf.summary.scalar('loss', loss)

            optimizer = tf.train.AdamOptimizer(
                learning_rate=learning_rate).minimize(loss=loss)

            with tf.name_scope('accuracy'):
                predicted_class = tf.sign(y_hat)
                predicted_class = tf.identity(predicted_class,
                                              name='predicted_class')
                with tf.name_scope('correct_prediction'):
                    correct = tf.equal(tf.argmax(predicted_class, 1),
                                       tf.argmax(y_input, 1))
                with tf.name_scope('accuracy'):
                    accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
            tf.summary.scalar('accuracy', accuracy)

            merged = tf.summary.merge_all()

            # set class properties
            self.x_input = x_input
            self.y_input = y_input
            self.p_keep = p_keep
            self.loss = loss
            self.optimizer = optimizer
            # self.state=state
            # self.states=states
            self.learning_rate = learning_rate
            self.predicted_class = predicted_class
            self.accuracy = accuracy
            self.merged = merged
示例#28
0
with tf.name_scope('Input_layer'):
    input_x = tf.placeholder(tf.int32, [None, maxlen], name='input_x')
    output_y = tf.placeholder(tf.float32, [None], name='output_y')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')

# Embedding layer
with tf.name_scope('Embedding_layer'):
    embeddings_var = tf.Variable(tf.random_uniform(
        [len(word_index) + 1, embedding_dim], -1.0, 1.0),
                                 trainable=True)
    tf.summary.histogram('embeddings_var', embeddings_var)
    batch_embedded = tf.nn.embedding_lookup(embeddings_var, input_x)

# BiDirectional RNN Layer
rnn_outputs, _ = bi_rnn(GRUCell(hidden_size),
                        GRUCell(hidden_size),
                        inputs=batch_embedded,
                        dtype=tf.float32)
tf.summary.histogram('RNN_outputs', rnn_outputs)

# Attention layer
with tf.name_scope('Attention_layer'):
    attention_output, alphas = attention(rnn_outputs,
                                         attention_size,
                                         return_alphas=True)
    tf.summary.histogram('alphas', alphas)

# Dropout for attention layer
drop = tf.nn.dropout(attention_output, keep_prob)

# Fully connected layer
with tf.name_scope('Fully_connected_layer'):
示例#29
0
    def _build_graph(self):
        now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        print(now)
        print("Build Graph...")
        print()

        self.xavier_init = tf.contrib.layers.xavier_initializer()

        self.embed_dim = 100
        self.state_dim = 100
        self.bi_state_dim = self.state_dim * 2
        self.attend_dim = 250
        self.feat_dim = self.bi_state_dim
        self.fc_dim = 150

        print("embed_dim : %d" % self.embed_dim)
        print("state_dim : %d" % self.state_dim)
        print("bi_state_dim : %d" % self.bi_state_dim)
        print("attend_dim : %d" % self.attend_dim)
        print("feat_dim : %d" % self.feat_dim)
        print("fc_dim : %d" % self.fc_dim)
        print()

        with tf.device(self.dev):
            with tf.variable_scope("input_placeholders"):
                self.enc_input = tf.placeholder(tf.int32,
                                                shape=[None, None],
                                                name="enc_input")
                self.enc_seq_len = tf.placeholder(tf.int32,
                                                  shape=[
                                                      None,
                                                  ],
                                                  name="enc_seq_len")
                self.targets = tf.placeholder(tf.int32,
                                              shape=[
                                                  None,
                                              ],
                                              name="targets")
                self.batch_size = tf.placeholder(tf.int32,
                                                 shape=[],
                                                 name="batch_size")
                self.keep_prob = tf.placeholder(tf.float32, name="keep_prob")

            with tf.variable_scope("words_embedding"):
                self.embeddings = tf.get_variable(
                    "embeddings", [self.voc_size, self.embed_dim],
                    initializer=self.xavier_init)
                self.embed_in = tf.nn.embedding_lookup(self.embeddings,
                                                       self.enc_input,
                                                       name="embed_in")

                self.pad_mask = tf.sequence_mask(self.enc_seq_len,
                                                 self.input_len_max,
                                                 dtype=tf.float32,
                                                 name="pad_mask1")

            with tf.variable_scope("rnn_encoder_layer"):
                self.output_enc, self.state_enc = bi_rnn(
                    GRUCell(self.state_dim),
                    GRUCell(self.state_dim),
                    inputs=self.embed_in,
                    sequence_length=self.enc_seq_len,
                    dtype=tf.float32)

                self.state_enc = tf.concat(
                    [self.state_enc[0], self.state_enc[1]],
                    axis=1,
                    name="state_enc1")
                assert self.state_enc.get_shape()[1] == self.bi_state_dim

                self.output_enc = tf.concat(
                    self.output_enc, axis=2)  # [batch, max_eng, state*2]
                self.output_enc = tf.nn.dropout(self.output_enc,
                                                keep_prob=self.keep_prob,
                                                name="output_enc1")
                print("output_enc.get_shape() : %s" %
                      (self.output_enc.get_shape()))
                assert self.output_enc.get_shape()[2] == self.bi_state_dim

            with tf.variable_scope("attention_layer"):
                self.rows = 30
                self.W_s1 = tf.get_variable(
                    "W_s1", [1, 1, self.feat_dim, self.attend_dim],
                    initializer=self.xavier_init)
                self.bias_s1 = tf.get_variable("bias_s1", [self.attend_dim])
                self.W_s2 = tf.get_variable("W_s2",
                                            [self.attend_dim, self.rows],
                                            initializer=self.xavier_init)

                self.identity = tf.reshape(
                    tf.tile(tf.diag(tf.ones(self.rows)), [self.batch_size, 1]),
                    [self.batch_size, self.rows, self.rows],
                    name="identity")

                self.output_enc_ex = tf.reshape(
                    self.output_enc,
                    [-1, self.input_len_max, 1, self.feat_dim])
                self.context_att = tf.nn.conv2d(self.output_enc_ex,
                                                self.W_s1,
                                                strides=[1, 1, 1, 1],
                                                padding="SAME")

                self.context_att = tf.tanh(tf.nn.bias_add(
                    self.context_att, self.bias_s1),
                                           name="context_att")
                print("context_att.get_shape() : %s" %
                      (self.context_att.get_shape()))

                # attention
                self.attention_tot = tf.matmul(
                    tf.reshape(self.context_att, [-1, self.attend_dim]),
                    self.W_s2)
                self.attention_tot = tf.reshape(
                    self.attention_tot, [-1, self.input_len_max, self.rows])
                self.attention_tot = tf.nn.softmax(
                    self.attention_tot, dim=1) * tf.reshape(
                        self.pad_mask, [-1, self.input_len_max, 1])
                self.attention_tot = tf.nn.softmax(self.attention_tot, dim=1)
                print("attention_tot.get_shape() : %s" %
                      (self.attention_tot.get_shape()))

                self.attention = tf.reduce_sum(self.attention_tot, axis=2)
                self.attention = tf.reshape(
                    self.attention,
                    [self.batch_size, self.input_len_max]) * self.pad_mask
                self.attention = tf.nn.softmax(self.attention)
                print("attention.get_shape() : %s" %
                      (self.attention.get_shape()))

                self.attention_tot_T = tf.transpose(self.attention_tot,
                                                    [0, 2, 1],
                                                    name="attention_tot_T")
                self.AA_t = tf.matmul(self.attention_tot_T,
                                      self.attention_tot) - self.identity
                print("AA_t.get_shape() : %s" % (self.AA_t.get_shape()))

                # penalty
                self.P = tf.square(tf.norm(self.AA_t, axis=[-2, -1],
                                           ord="fro"))
                self.P = tf.reduce_mean(self.P, name="P")

                # context..
                self.context = tf.reduce_sum(
                    self.output_enc *
                    tf.reshape(self.attention, [-1, self.input_len_max, 1]),
                    axis=1,
                    name="context")
                print("context.get_shape() : %s" % (self.context.get_shape()))
                assert self.context.get_shape()[1] == self.feat_dim

            with tf.variable_scope("dense_layer"):
                self.W_out1 = tf.get_variable("W_out1",
                                              [self.feat_dim, self.fc_dim],
                                              initializer=self.xavier_init)
                self.bias_out1 = tf.get_variable("bias_out1", [self.fc_dim])
                self.W_out2 = tf.get_variable("W_out2",
                                              [self.fc_dim, self.target_size],
                                              initializer=self.xavier_init)
                self.bias_out2 = tf.get_variable("bias_out2",
                                                 [self.target_size])

                self.fc = tf.nn.xw_plus_b(self.context, self.W_out1,
                                          self.bias_out1)
                self.fc = tf.tanh(self.fc)
                print("fc.get_shape() : %s" % (self.fc.get_shape()))

                self.y_hat = tf.nn.xw_plus_b(self.fc,
                                             self.W_out2,
                                             self.bias_out2,
                                             name="y_hat")
                print("y_hat.get_shape() : %s" % (self.y_hat.get_shape()))

            with tf.variable_scope("train_optimization"):
                self.train_vars = tf.trainable_variables()

                print()
                print("trainable_variables")
                for varvar in self.train_vars:
                    print(varvar)
                print()

                self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.y_hat, labels=self.targets)
                self.loss = tf.reduce_mean(self.loss, name="loss")
                self.loss_l2 = tf.add_n([
                    tf.nn.l2_loss(v)
                    for v in self.train_vars if "bias" not in v.name
                ]) * 0.0001
                self.loss = self.loss + self.loss_l2 + self.P

                self.predict = tf.argmax(tf.nn.softmax(self.y_hat), 1)
                self.predict = tf.cast(tf.reshape(self.predict,
                                                  [self.batch_size, 1]),
                                       tf.int32,
                                       name="predict")

                self.target_label = tf.cast(
                    tf.reshape(self.targets, [self.batch_size, 1]), tf.int32)
                self.correct = tf.equal(self.predict, self.target_label)
                self.accuracy = tf.reduce_mean(tf.cast(self.correct,
                                                       tf.float32),
                                               name="accuracy")

                self.global_step = tf.Variable(0,
                                               name="global_step",
                                               trainable=False)
                self.decay_rate = tf.maximum(0.00007,
                                             tf.train.exponential_decay(
                                                 self.lr,
                                                 self.global_step,
                                                 1000,
                                                 0.9,
                                                 staircase=True),
                                             name="decay_rate")
                self.opt = tf.train.AdamOptimizer(
                    learning_rate=self.decay_rate)
                self.grads_and_vars = self.opt.compute_gradients(
                    self.loss, self.train_vars)
                self.grads_and_vars = [(tf.clip_by_norm(g, 0.5), v)
                                       for g, v in self.grads_and_vars]
                self.grads_and_vars = [
                    (tf.add(g, tf.random_normal(tf.shape(g), stddev=0.001)), v)
                    for g, v in self.grads_and_vars
                ]

                self.train_op = self.opt.apply_gradients(
                    self.grads_and_vars,
                    global_step=self.global_step,
                    name="train_op")

            # Summaries for loss and lr
            self.loss_summary = tf.summary.scalar("loss", self.loss)
            self.accuracy_summary = tf.summary.scalar("accuracy",
                                                      self.accuracy)
            self.lr_summary = tf.summary.scalar("lr", self.decay_rate)

            # Output directory for models and summaries
            timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
            self.out_dir = os.path.abspath(
                os.path.join("./model/rnn_self_att", timestamp))
            print("LOGDIR = %s" % self.out_dir)
            print()

            # Train Summaries
            self.train_summary_op = tf.summary.merge(
                [self.loss_summary, self.accuracy_summary, self.lr_summary])
            self.train_summary_dir = os.path.join(self.out_dir, "summary",
                                                  "train")
            self.train_summary_writer = tf.summary.FileWriter(
                self.train_summary_dir, self.sess.graph)

            # Test summaries
            self.test_summary_op = tf.summary.merge(
                [self.loss_summary, self.accuracy_summary, self.lr_summary])
            self.test_summary_dir = os.path.join(self.out_dir, "summary",
                                                 "test")
            self.test_summary_writer = tf.summary.FileWriter(
                self.test_summary_dir, self.sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            self.checkpoint_dir = os.path.abspath(
                os.path.join(self.out_dir, "checkpoints"))
            self.checkpoint_prefix = os.path.join(self.checkpoint_dir,
                                                  "model-step")
            if self.makedir:
                if not os.path.exists(self.checkpoint_dir):
                    os.makedirs(self.checkpoint_dir)

            self.saver = tf.train.Saver(tf.global_variables(),
                                        max_to_keep=None)
示例#30
0
    target_ph = tf.placeholder(tf.float32, [None], name='target_ph')
    seq_len_ph = tf.placeholder(tf.int32, [None], name='seq_len_ph')
    keep_prob_ph = tf.placeholder(tf.float32, name='keep_prob_ph')

# Embedding layer
with tf.name_scope('Embedding_layer'):
    embeddings_var = tf.Variable(tf.random_uniform(
        [vocabulary_size, EMBEDDING_DIM], -1.0, 1.0),
                                 trainable=True)
    tf.summary.histogram('embeddings_var', embeddings_var)
    batch_embedded = tf.nn.embedding_lookup(embeddings_var, batch_ph)

# (Bi-)RNN layer(-s)
rnn_outputs, _ = bi_rnn(GRUCell(HIDDEN_SIZE),
                        GRUCell(HIDDEN_SIZE),
                        inputs=batch_embedded,
                        sequence_length=seq_len_ph,
                        dtype=tf.float32)
tf.summary.histogram('RNN_outputs', rnn_outputs)

# Attention layer
with tf.name_scope('Attention_layer'):
    attention_output, alphas = attention(rnn_outputs,
                                         ATTENTION_SIZE,
                                         return_alphas=True)
    tf.summary.histogram('alphas', alphas)

# Dropout
drop = tf.nn.dropout(attention_output, keep_prob_ph)

# Fully connected layer
示例#31
0
X_test = fit_in_vocabulary(X_test, vocabulary_size)
X_train = zero_pad(X_train, SEQUENCE_LENGTH)
X_test = zero_pad(X_test, SEQUENCE_LENGTH)

# Different placeholders
batch_ph = tf.placeholder(tf.int32, [None, SEQUENCE_LENGTH])
target_ph = tf.placeholder(tf.float32, [None])
seq_len_ph = tf.placeholder(tf.int32, [None])
keep_prob_ph = tf.placeholder(tf.float32)

# Embedding layer
embeddings_var = tf.Variable(tf.random_uniform([vocabulary_size, EMBEDDING_DIM], -1.0, 1.0), trainable=True)
batch_embedded = tf.nn.embedding_lookup(embeddings_var, batch_ph)

# (Bi-)RNN layer(-s)
rnn_outputs, _ = bi_rnn(GRUCell(HIDDEN_SIZE), GRUCell(HIDDEN_SIZE),
                        inputs=batch_embedded, sequence_length=seq_len_ph, dtype=tf.float32)
# rnn_outputs, _ = rnn(GRUCell(hidden_size), inputs=batch_embedded, sequence_length=seq_len_ph, dtype=tf.float32)

# Attention layer
attention_output, alphas = attention(rnn_outputs, ATTENTION_SIZE, return_alphas=True)

# Dropout
drop = tf.nn.dropout(attention_output, keep_prob_ph)

# Fully connected layer
W = tf.Variable(tf.truncated_normal([drop.get_shape()[1].value, 1], stddev=0.1))
b = tf.Variable(tf.constant(0., shape=[1]))
y_hat = tf.nn.xw_plus_b(drop, W, b)
y_hat = tf.squeeze(y_hat)

# Cross-entropy loss and optimizer initialization