Exemplo n.º 1
0
    def build_model(self):
        rnn_outputs_hypo, final_state_hypo = lstm_layer(
            self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo,
            "hypo")
        rnn_outputs_prem, final_state_prem = lstm_layer(
            self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem,
            "prem")

        last_output_hypo, alphas_hypo = attention_layer(self.attention_size,
                                                        rnn_outputs_hypo,
                                                        "encoder_hypo",
                                                        sparse=self.sparse)
        last_output_prem, alphas_prem = attention_layer(self.attention_size,
                                                        rnn_outputs_prem,
                                                        "encoder_prem",
                                                        sparse=self.sparse)
        self.alphas_hypo = alphas_hypo
        self.alphas_prem = alphas_prem
        self.logits = dense_layer(tf.concat(
            [last_output_hypo, last_output_prem], axis=1),
                                  3,
                                  activation=None,
                                  name="pred_out")
        self.y = tf.nn.softmax(self.logits)

        self.cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(
                self.y_holder, depth=3),
                                                    logits=self.logits))

        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))
        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_op = self.optimizer.minimize(self.cost)
Exemplo n.º 2
0
    def build_model(self):
        rnn_outputs_hypo, final_state_hypo = lstm_layer(
            self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo,
            "hypo")
        rnn_outputs_prem, final_state_prem = lstm_layer(
            self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem,
            "prem")

        last_output_hypo, alphas_hypo = attention_layer(self.attention_size,
                                                        rnn_outputs_hypo,
                                                        "encoder_hypo",
                                                        sparse=self.sparse)
        last_output_prem, alphas_prem = attention_layer(self.attention_size,
                                                        rnn_outputs_prem,
                                                        "encoder_prem",
                                                        sparse=self.sparse)
        self.alphas_hypo = alphas_hypo
        self.alphas_prem = alphas_prem
        self.logits = dense_layer(tf.concat(
            [last_output_hypo, last_output_prem], axis=1),
                                  3,
                                  activation=None,
                                  name="pred_out")
        self.y = tf.nn.softmax(self.logits)

        adv_in_hypo = tf.reshape(
            self.e_hypo, [-1, self.e_hypo.shape[1] * self.e_hypo.shape[2]])
        adv_in_prem = tf.reshape(
            self.e_prem, [-1, self.e_prem.shape[1] * self.e_hypo.shape[2]])
        """
        ### Debug ###
        self.w_adv = tf.get_variable("w", shape=[adv_in.shape[-1], 2],
                                     initializer=tf.truncated_normal_initializer())
        self.b_adv = tf.get_variable("b", shape=[2], dtype=tf.float32)

        adv_logits = tf.matmul(adv_in, self.w_adv) + self.b_adv
        ############
        """
        adv_logits = dense_layer(tf.concat([adv_in_hypo, adv_in_prem], axis=1),
                                 3,
                                 activation=None,
                                 name="adv_encoder")
        adv_cost = 1 / tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                labels=tf.one_hot(self.y_holder, depth=3), logits=adv_logits))

        self.cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(
                self.y_holder, depth=3),
                                                    logits=self.logits))
        self.cost = self.cost + 0.01 * adv_cost

        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))

        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_op = self.optimizer.minimize(self.cost)
Exemplo n.º 3
0
    def build_model(self):

        rnn_outputs, final_state = lstm_layer(self.e, self.lstm_size,
                                              self.batch_size, self.seq_len)

        last_output, self.alphas = attention_layer(self.attention_size,
                                                   rnn_outputs,
                                                   "pred_encoder",
                                                   sparse=self.sparse)

        last_output = tf.nn.dropout(last_output, self.keep_probs)

        self.logits = dense_layer(last_output,
                                  2,
                                  activation=None,
                                  name="pred_out")
        self.y = tf.nn.softmax(self.logits)

        self.cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(
                self.y_holder, depth=2),
                                                    logits=self.logits))

        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))
        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_op = self.optimizer.minimize(self.cost)
Exemplo n.º 4
0
    def build_model(self):
        # shape = (batch_size, sentence_length, emb_dim)

        rnn_outputs, final_state = lstm_layer(self.e, self.lstm_size,
                                              self.batch_size, self.seq_len)

        last_output, self.alphas = attention_layer(self.attention_size,
                                                   rnn_outputs,
                                                   "encoder",
                                                   sparse=self.sparse)

        self.logits = dense_layer(last_output,
                                  2,
                                  activation=None,
                                  name="pred_out")
        self.y = tf.nn.softmax(self.logits)

        # WARNING: This op expects unscaled logits, since it performs a softmax on logits internally for efficiency.
        # Do not call this op with the output of softmax, as it will produce incorrect results.
        self.cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(
                self.y_holder, depth=2),
                                                    logits=self.logits))

        reg = get_reg(self.alphas, lam=self.lam, type=self.reg)
        self.cost += reg

        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))

        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_op = self.optimizer.minimize(self.cost)
Exemplo n.º 5
0
    def build_model(self):
        # input shape = (batch_size, sentence_length, emb_dim)

        rnn_outputs_hypo, final_state_hypo = lstm_layer(
            self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo,
            "hypo")
        rnn_outputs_prem, final_state_prem = lstm_layer(
            self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem,
            "prem")

        last_output_hypo, alphas_hypo = attention_layer(self.attention_size,
                                                        rnn_outputs_hypo,
                                                        "encoder_hypo",
                                                        sparse=self.sparse)
        last_output_prem, alphas_prem = attention_layer(self.attention_size,
                                                        rnn_outputs_prem,
                                                        "encoder_prem",
                                                        sparse=self.sparse)

        self.alphas_hypo = alphas_hypo
        self.alphas_prem = alphas_prem
        self.logits = dense_layer(tf.concat(
            [last_output_hypo, last_output_prem], axis=1),
                                  3,
                                  activation=None,
                                  name="pred_out")
        self.y = tf.nn.softmax(self.logits)

        # WARNING: This op expects unscaled logits, since it performs a softmax on logits internally for efficiency.
        # Do not call this op with the output of softmax, as it will produce incorrect results.
        self.cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(
                self.y_holder, depth=3),
                                                    logits=self.logits))

        reg1 = get_reg(alphas_hypo, lam=self.lam, type=self.reg)
        reg2 = get_reg(alphas_prem, lam=self.lam, type=self.reg)
        self.cost += reg1 + reg2

        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))

        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_op = self.optimizer.minimize(self.cost)
Exemplo n.º 6
0
    def build_model(self):

        rnn_outputs, final_state = lstm_layer(self.e, self.lstm_size,
                                              self.batch_size, self.seq_len)

        last_output, self.alphas = attention_layer(self.attention_size,
                                                   rnn_outputs,
                                                   "pred_encoder",
                                                   sparse=self.sparse)

        last_output = tf.nn.dropout(last_output, self.keep_probs)

        self.logits = dense_layer(last_output,
                                  2,
                                  activation=None,
                                  name="pred_out")
        self.y = tf.nn.softmax(self.logits)

        ### Debug ###
        adv_in = tf.reshape(self.e, [-1, self.e.shape[1] * self.e.shape[2]])
        self.w_adv = tf.get_variable(
            "w",
            shape=[adv_in.shape[-1], 2],
            initializer=tf.truncated_normal_initializer())
        self.b_adv = tf.get_variable("b", shape=[2], dtype=tf.float32)

        adv_logits = tf.matmul(adv_in, self.w_adv) + self.b_adv
        ############

        # adv_logits = dense_layer(adv_in, 2, activation=None, name="adv_encoder")
        adv_cost = 1 / tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                labels=tf.one_hot(self.y_holder, depth=2), logits=adv_logits))

        self.cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(
                self.y_holder, depth=2),
                                                    logits=self.logits))
        self.cost = self.cost + 0.01 * adv_cost

        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))

        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_op = self.optimizer.minimize(self.cost)
Exemplo n.º 7
0
    def build_model(self):
        # Define prediction rnn
        rnn_outputs_hypo, final_state_hypo = lstm_layer(
            self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo,
            "hypo")
        rnn_outputs_prem, final_state_prem = lstm_layer(
            self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem,
            "prem")

        last_output_hypo, alphas_hypo = attention_layer(self.attention_size,
                                                        rnn_outputs_hypo,
                                                        "encoder_hypo",
                                                        sparse=self.sparse)
        last_output_prem, alphas_prem = attention_layer(self.attention_size,
                                                        rnn_outputs_prem,
                                                        "encoder_prem",
                                                        sparse=self.sparse)
        self.alphas_hypo = alphas_hypo
        self.alphas_prem = alphas_prem
        # last_output = tf.nn.dropout(last_output, self.keep_probs)

        # Define key-word model rnn
        kwm_rnn_outputs_hypo, kwm_final_state_hypo = lstm_layer(
            self.e_hypo,
            self.lstm_size,
            self.batch_size,
            self.seq_len_hypo,
            scope="kwm_hypo")
        kwm_rnn_outputs_prem, kwm_final_state_prem = lstm_layer(
            self.e_prem,
            self.lstm_size,
            self.batch_size,
            self.seq_len_prem,
            scope="kwm_prem")
        kwm_last_output_hypo, kwm_alphas_hypo = attention_layer(
            self.attention_size,
            kwm_rnn_outputs_hypo,
            "kwm_encoder_hypo",
            sparse=self.sparse)
        kwm_last_output_prem, kwm_alphas_prem = attention_layer(
            self.attention_size,
            kwm_rnn_outputs_prem,
            "kwm_encoder_prem",
            sparse=self.sparse)

        last_output = tf.concat([last_output_hypo, last_output_prem], axis=1)
        kwm_last_output = tf.concat(
            [kwm_last_output_hypo, kwm_last_output_prem], axis=1)

        ############################
        # Hex #########################

        h_fc1 = last_output
        h_fc2 = kwm_last_output

        # Hex layer definition
        """
        self.W_cl_1 = tf.Variable(tf.random_normal([self.dim, 3], stddev=0.1))
        self.W_cl_2 = tf.Variable(tf.random_normal([1200, 3]), trainable=True)
        self.b_cl = tf.Variable(tf.random_normal((3,)), trainable=True)
        self.W_cl = tf.concat([self.W_cl_1, self.W_cl_2], 0)
        """

        # Compute prediction using [h_fc1, 0(pad)]
        pad = tf.zeros_like(h_fc2, tf.float32)
        # print(pad.shape) -> (?, 600)

        yconv_contact_pred = tf.nn.dropout(tf.concat([h_fc1, pad], 1),
                                           self.keep_probs)

        # y_conv_pred = tf.matmul(yconv_contact_pred, self.W_cl) + self.b_cl
        y_conv_pred = dense_layer(yconv_contact_pred, 3, name="conv_pred")

        self.logits = y_conv_pred  # Prediction

        # Compute loss using [h_fc1, h_fc2] and [0(pad2), h_fc2]
        pad2 = tf.zeros_like(h_fc1, tf.float32)

        yconv_contact_H = tf.concat([pad2, h_fc2], 1)
        # Get Fg
        # y_conv_H = tf.matmul(yconv_contact_H, self.W_cl) + self.b_cl  # get Fg
        y_conv_H = dense_layer(yconv_contact_H, 3, name="conv_H")

        yconv_contact_loss = tf.nn.dropout(tf.concat([h_fc1, h_fc2], 1),
                                           self.keep_probs)
        # Get Fb
        # y_conv_loss = tf.matmul(yconv_contact_loss, self.W_cl) + self.b_cl  # get Fb
        y_conv_loss = dense_layer(yconv_contact_loss, 3, name="conv_loss")

        temp = tf.matmul(y_conv_H, y_conv_H, transpose_a=True)
        self.temp = temp

        y_conv_loss = y_conv_loss - tf.matmul(
            tf.matmul(tf.matmul(y_conv_H, tf.matrix_inverse(temp)),
                      y_conv_H,
                      transpose_b=True), y_conv_loss)  # get loss

        self.logits = y_conv_loss
        self.y = tf.nn.softmax(self.logits)

        self.cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(
                self.y_holder, depth=3),
                                                    logits=self.logits))

        # Regularize kwm attention
        reg1 = get_reg(kwm_alphas_hypo, lam=self.lam, type=self.reg)
        reg2 = get_reg(kwm_alphas_prem, lam=self.lam, type=self.reg)

        self.cost += reg1 + reg2

        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_op = self.optimizer.minimize(self.cost)
        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))