示例#1
0
with tf.variable_scope("char_embedding"):
    char_embedding = embedded(mnli.char_embedding, name="char")
    char_embedding_pre = char_embedding(sent1char)
    char_embedding_hyp = char_embedding(sent2char)

    with tf.variable_scope("conv") as scope:
        conv_pre = char_conv(char_embedding_pre, filter_size=filter_size)
        scope.reuse_variables()
        conv_hyp = char_conv(char_embedding_hyp, filter_size=filter_size)

embed_pre = tf.concat((embedding_pre, antonym1, exact1to2, synonym1, conv_pre),
                      -1)
embed_hyp = tf.concat((embedding_hyp, antonym2, exact2to1, synonym2, conv_hyp),
                      -1)

hout_pre = highway_network(embed_pre, 2, [tf.nn.sigmoid] * 2, "premise")
hout_hyp = highway_network(embed_hyp, 2, [tf.nn.sigmoid] * 2, "hypothesis")

#peter: dim reduction
hout_pre = normalize(
    tf.layers.dense(hout_pre, hidden_dim, activation=tf.nn.sigmoid))
hout_hyp = normalize(
    tf.layers.dense(hout_hyp, hidden_dim, activation=tf.nn.sigmoid))

hout_pre = mask(hout_pre, sent1_mask)
hout_hyp = mask(hout_hyp, sent2_mask)

pre_atten = multihead_attention(hout_pre,
                                hout_pre,
                                hout_pre,
                                h=num_heads,
    def _build_forward(self):
        config = self.config
        N, JX, VW, VC, d, W = \
            config.batch_size, config.max_sent_size, \
            config.word_vocab_size, config.char_vocab_size, \
            config.hidden_size, config.max_word_size        
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size


        # Getting word vector

        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx)  # [N, JX, W, dc]
                    Acy = tf.nn.embedding_lookup(char_emb_mat, self.cy)  # [N, JX, W, dc]

                    filter_sizes = list(map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx, filter_sizes, heights, "VALID",  self.is_train, config.keep_prob, scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            yy = multi_conv1d(Acy, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx")
                        else:
                            yy = multi_conv1d(Acy, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="yy")
                        xx = tf.reshape(xx, [-1, JX, dco])
                        yy = tf.reshape(yy, [-1, JX, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(axis=0, values=[word_emb_mat, self.new_emb_mat])

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat, self.x)  # [N, JX, d]
                    Ay = tf.nn.embedding_lookup(word_emb_mat, self.y)  # [N, JX, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['y'] = Ay
                if config.use_char_emb:
                    xx = tf.concat(axis=2, values=[xx, Ax])  # [N, M, JX, di]
                    yy = tf.concat(axis=2, values=[yy, Ay])  # [N, JQ, di]
                else:
                    xx = Ax
                    yy = Ay
            
            # highway network
            if config.highway:
                with tf.variable_scope("highway"):
                    xx = highway_network(xx, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)
                    tf.get_variable_scope().reuse_variables()
                    yy = highway_network(yy, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)
        
        self.tensor_dict['xx'] = xx
        self.tensor_dict['yy'] = yy

        

        self.x_output, self.x_state = self._encoder(xx, self.x_length)
        self.y_output, self.y_state = self._encoder(yy, self.y_length, reuse=True) # use the same sentence encoder.
        
        length = get_sequence_length(self.x_output)
        self.X = get_last_relevant_rnn_output(self.x_output, length)

        length = get_sequence_length(self.y_output)
        self.Y = get_last_relevant_rnn_output(self.y_output, length)

        self.h0 = tf.concat((self.X, self.Y), 1)

        self.W1 = tf.get_variable("W1", shape=[self.h_dim * 4, 200])
        self.b1 = tf.get_variable("b1", shape=[200])
        self.a1 = tf.nn.relu(tf.add(tf.matmul(self.h0, self.W1), self.b1))

        self.W2 = tf.get_variable("W2", shape=[200, 200])
        self.b2 = tf.get_variable("b2", shape=[200])
        self.a2 = tf.nn.relu(tf.add(tf.matmul(self.a1, self.W2), self.b2))

        self.W3 = tf.get_variable("W3", shape=[200, 200])
        self.b3 = tf.get_variable("b3", shape=[200])
        self.a3 = tf.nn.relu(tf.add(tf.matmul(self.a2, self.W3), self.b3))
        
        self.W_pred = tf.get_variable("W_pred", shape=[200, 3])
        self.logits = tf.matmul(self.a3, self.W_pred)


        print("logits:", self.logits)
示例#3
0
    def _build_forward(self):
        config = self.config
        N, JX, VW, VC, d, W = \
            config.batch_size, config.max_sent_size, \
            config.word_vocab_size, config.char_vocab_size, \
            config.hidden_size, config.max_word_size
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        # Getting word vector

        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cx)  # [N, JX, W, dc]
                    Acy = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cy)  # [N, JX, W, dc]

                    filter_sizes = list(
                        map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            yy = multi_conv1d(Acy,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="xx")
                        else:
                            yy = multi_conv1d(Acy,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="yy")
                        xx = tf.reshape(xx, [-1, JX, dco])
                        yy = tf.reshape(yy, [-1, JX, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(
                            axis=0, values=[word_emb_mat, self.new_emb_mat])

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, JX, d]
                    Ay = tf.nn.embedding_lookup(word_emb_mat,
                                                self.y)  # [N, JX, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['y'] = Ay
                if config.use_char_emb:
                    xx = tf.concat(axis=2, values=[xx, Ax])  # [N, M, JX, di]
                    yy = tf.concat(axis=2, values=[yy, Ay])  # [N, JQ, di]
                else:
                    xx = Ax
                    yy = Ay

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                yy = highway_network(yy,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['yy'] = yy

        with tf.variable_scope("encode_x"):
            self.fwd_lstm = BasicLSTMCell(self.h_dim, state_is_tuple=True)
            self.x_output, self.x_state = dynamic_rnn(cell=self.fwd_lstm,
                                                      inputs=xx,
                                                      dtype=tf.float32)
            # self.x_output, self.x_state = bidirectional_dynamic_rnn(cell_fw=self.fwd_lstm,cell_bw=self.bwd_lstm,inputs=self.x_emb,dtype=tf.float32)
            # print(self.x_output)
        with tf.variable_scope("encode_y"):
            self.fwd_lstm = BasicLSTMCell(self.h_dim, state_is_tuple=True)
            self.y_output, self.y_state = dynamic_rnn(
                cell=self.fwd_lstm,
                inputs=yy,
                initial_state=self.x_state,
                dtype=tf.float32)
            # print self.y_output
            # print self.y_state

        length = get_sequence_length(self.y_output)
        self.Y = get_last_relevant_rnn_output(self.y_output, length)

        self.hstar = self.Y

        self.W_pred = tf.get_variable("W_pred", shape=[self.h_dim, 3])
        self.logits = tf.matmul(self.hstar, self.W_pred)

        print("logits:", self.logits)
示例#4
0
    def _build_forward(self):
        config = self.config
        N, JX, VW, VC, d, W = \
            config.batch_size, config.max_sent_size, \
            config.word_vocab_size, config.char_vocab_size, \
            config.hidden_size, config.max_word_size        
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx)  # [N, JX, W, dc]
                    Acy = tf.nn.embedding_lookup(char_emb_mat, self.cy)  # [N, JX, W, dc]

                    filter_sizes = list(map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx, filter_sizes, heights, "VALID",  self.is_train, config.keep_prob, scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            yy = multi_conv1d(Acy, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx")
                        else:
                            yy = multi_conv1d(Acy, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="yy")
                        xx = tf.reshape(xx, [-1, JX, dco])
                        yy = tf.reshape(yy, [-1, JX, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(axis=0, values=[word_emb_mat, self.new_emb_mat])

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat, self.x)  # [N, JX, d]
                    Ay = tf.nn.embedding_lookup(word_emb_mat, self.y)  # [N, JX, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['y'] = Ay
                if config.use_char_emb:
                    xx = tf.concat(axis=2, values=[xx, Ax])  # [N, M, JX, di]
                    yy = tf.concat(axis=2, values=[yy, Ay])  # [N, JQ, di]
                else:
                    xx = Ax
                    yy = Ay

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                yy = highway_network(yy, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['yy'] = yy

        print(xx)
        xx = tf.reduce_sum(xx, 1)
        print(xx)
        yy = tf.reduce_sum(yy, 1)

        con = tf.concat([xx, yy], 1)
        print(con)

        self.W1 = tf.get_variable("W1", shape=[self.h_dim*2, 200])
        self.a1 = tf.tanh(tf.matmul(con, self.W1))
        self.W2 = tf.get_variable("W2", shape=[self.h_dim*2, 200])
        self.a2 = tf.tanh(tf.matmul(self.a1, self.W2))
        
        self.W_pred = tf.get_variable("W_pred", shape=[self.h_dim*2, 3])
        self.logits = tf.matmul(self.a2, self.W_pred)

        
        print("logits:", self.logits)
示例#5
0
    def _build_forward(self):
        config = self.config
        N, JX, VW, VC, d, W = \
            config.batch_size, config.max_sent_size, \
            config.word_vocab_size, config.char_vocab_size, \
            config.hidden_size, config.max_word_size
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        # Getting word vector. For now, we only care about word_emb. Forget about char_emb.
        with tf.variable_scope("emb"):
            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(
                            axis=0, values=[word_emb_mat, self.new_emb_mat])

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, JX, d]
                    Ay = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, JX, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['y'] = Ay
                    xx = Ax
                    yy = Ay

        # xx is the preocessed encoder input,
        # yy should be derived from xx.

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                yy = highway_network(yy,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['yy'] = yy

        self.encoder_inputs_embedded = xx

        self.decoder_train_inputs_embedded = yy
        self.decoder_train_length = self.y_length
        self.decoder_train_targets = self.x
        print("train_target:", self.decoder_train_targets)

        with tf.variable_scope("Encoder") as scope:
            encoder_cell = LSTMCell(self.h_dim, state_is_tuple=True)
            (self.encoder_outputs, self.encoder_state) = (tf.nn.dynamic_rnn(
                cell=encoder_cell,
                inputs=self.encoder_inputs_embedded,
                sequence_length=self.x_length,
                dtype=tf.float32))

        with tf.variable_scope("Decoder") as scope:

            decoder_cell = LSTMCell(self.h_dim, state_is_tuple=True)

            print("self.decoder_train_inputs_embedded:",
                  self.decoder_train_inputs_embedded)
            print("self.decoder_train_length:", self.decoder_train_length)
            helper = seq2seq.TrainingHelper(self.decoder_train_inputs_embedded,
                                            self.decoder_train_length)
            # Try schduled training helper. It may increase performance.

            decoder = seq2seq.BasicDecoder(cell=decoder_cell,
                                           helper=helper,
                                           initial_state=self.encoder_state)
            # Try AttentionDecoder.
            self.decoder_outputs_train, self.decoder_state_train = seq2seq.dynamic_decode(
                decoder,
                impute_finished=True,
                scope=scope,
            )

            print("shape of self.decoder_outputs_train.rnn_output",
                  self.decoder_outputs_train.rnn_output)
            self.decoder_logits = self.decoder_outputs_train.rnn_output

            w_t = tf.get_variable("proj_w", [self.vocab_size, self.h_dim],
                                  dtype=tf.float32)
            w = tf.transpose(w_t)
            b = tf.get_variable("proj_b", [self.vocab_size], dtype=tf.float32)
            self.output_projection = (w, b)

            m = tf.matmul(tf.reshape(self.decoder_logits, [-1, self.h_dim]), w)
            print("m:", m)

            self.decoder_prediction_train = tf.argmax(
                tf.reshape(m, [N, -1, self.vocab_size]) +
                self.output_projection[1],
                axis=-1,
                name='decoder_prediction_train')