with tf.variable_scope("char_embedding"): char_embedding = embedded(mnli.char_embedding, name="char") char_embedding_pre = char_embedding(sent1char) char_embedding_hyp = char_embedding(sent2char) with tf.variable_scope("conv") as scope: conv_pre = char_conv(char_embedding_pre, filter_size=filter_size) scope.reuse_variables() conv_hyp = char_conv(char_embedding_hyp, filter_size=filter_size) embed_pre = tf.concat((embedding_pre, antonym1, exact1to2, synonym1, conv_pre), -1) embed_hyp = tf.concat((embedding_hyp, antonym2, exact2to1, synonym2, conv_hyp), -1) hout_pre = highway_network(embed_pre, 2, [tf.nn.sigmoid] * 2, "premise") hout_hyp = highway_network(embed_hyp, 2, [tf.nn.sigmoid] * 2, "hypothesis") #peter: dim reduction hout_pre = normalize( tf.layers.dense(hout_pre, hidden_dim, activation=tf.nn.sigmoid)) hout_hyp = normalize( tf.layers.dense(hout_hyp, hidden_dim, activation=tf.nn.sigmoid)) hout_pre = mask(hout_pre, sent1_mask) hout_hyp = mask(hout_hyp, sent2_mask) pre_atten = multihead_attention(hout_pre, hout_pre, hout_pre, h=num_heads,
def _build_forward(self): config = self.config N, JX, VW, VC, d, W = \ config.batch_size, config.max_sent_size, \ config.word_vocab_size, config.char_vocab_size, \ config.hidden_size, config.max_word_size dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size # Getting word vector with tf.variable_scope("emb"): if config.use_char_emb: with tf.variable_scope("emb_var"), tf.device("/cpu:0"): char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float') with tf.variable_scope("char"): Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx) # [N, JX, W, dc] Acy = tf.nn.embedding_lookup(char_emb_mat, self.cy) # [N, JX, W, dc] filter_sizes = list(map(int, config.out_channel_dims.split(','))) heights = list(map(int, config.filter_heights.split(','))) assert sum(filter_sizes) == dco, (filter_sizes, dco) with tf.variable_scope("conv"): xx = multi_conv1d(Acx, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx") if config.share_cnn_weights: tf.get_variable_scope().reuse_variables() yy = multi_conv1d(Acy, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx") else: yy = multi_conv1d(Acy, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="yy") xx = tf.reshape(xx, [-1, JX, dco]) yy = tf.reshape(yy, [-1, JX, dco]) if config.use_word_emb: with tf.variable_scope("emb_var"), tf.device("/cpu:0"): if config.mode == 'train': word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat)) else: word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float') if config.use_glove_for_unk: word_emb_mat = tf.concat(axis=0, values=[word_emb_mat, self.new_emb_mat]) with tf.name_scope("word"): Ax = tf.nn.embedding_lookup(word_emb_mat, self.x) # [N, JX, d] Ay = tf.nn.embedding_lookup(word_emb_mat, self.y) # [N, JX, d] self.tensor_dict['x'] = Ax self.tensor_dict['y'] = Ay if config.use_char_emb: xx = tf.concat(axis=2, values=[xx, Ax]) # [N, M, JX, di] yy = tf.concat(axis=2, values=[yy, Ay]) # [N, JQ, di] else: xx = Ax yy = Ay # highway network if config.highway: with tf.variable_scope("highway"): xx = highway_network(xx, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train) tf.get_variable_scope().reuse_variables() yy = highway_network(yy, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train) self.tensor_dict['xx'] = xx self.tensor_dict['yy'] = yy self.x_output, self.x_state = self._encoder(xx, self.x_length) self.y_output, self.y_state = self._encoder(yy, self.y_length, reuse=True) # use the same sentence encoder. length = get_sequence_length(self.x_output) self.X = get_last_relevant_rnn_output(self.x_output, length) length = get_sequence_length(self.y_output) self.Y = get_last_relevant_rnn_output(self.y_output, length) self.h0 = tf.concat((self.X, self.Y), 1) self.W1 = tf.get_variable("W1", shape=[self.h_dim * 4, 200]) self.b1 = tf.get_variable("b1", shape=[200]) self.a1 = tf.nn.relu(tf.add(tf.matmul(self.h0, self.W1), self.b1)) self.W2 = tf.get_variable("W2", shape=[200, 200]) self.b2 = tf.get_variable("b2", shape=[200]) self.a2 = tf.nn.relu(tf.add(tf.matmul(self.a1, self.W2), self.b2)) self.W3 = tf.get_variable("W3", shape=[200, 200]) self.b3 = tf.get_variable("b3", shape=[200]) self.a3 = tf.nn.relu(tf.add(tf.matmul(self.a2, self.W3), self.b3)) self.W_pred = tf.get_variable("W_pred", shape=[200, 3]) self.logits = tf.matmul(self.a3, self.W_pred) print("logits:", self.logits)
def _build_forward(self): config = self.config N, JX, VW, VC, d, W = \ config.batch_size, config.max_sent_size, \ config.word_vocab_size, config.char_vocab_size, \ config.hidden_size, config.max_word_size dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size # Getting word vector with tf.variable_scope("emb"): if config.use_char_emb: with tf.variable_scope("emb_var"), tf.device("/cpu:0"): char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float') with tf.variable_scope("char"): Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx) # [N, JX, W, dc] Acy = tf.nn.embedding_lookup(char_emb_mat, self.cy) # [N, JX, W, dc] filter_sizes = list( map(int, config.out_channel_dims.split(','))) heights = list(map(int, config.filter_heights.split(','))) assert sum(filter_sizes) == dco, (filter_sizes, dco) with tf.variable_scope("conv"): xx = multi_conv1d(Acx, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx") if config.share_cnn_weights: tf.get_variable_scope().reuse_variables() yy = multi_conv1d(Acy, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx") else: yy = multi_conv1d(Acy, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="yy") xx = tf.reshape(xx, [-1, JX, dco]) yy = tf.reshape(yy, [-1, JX, dco]) if config.use_word_emb: with tf.variable_scope("emb_var"), tf.device("/cpu:0"): if config.mode == 'train': word_emb_mat = tf.get_variable( "word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat)) else: word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float') if config.use_glove_for_unk: word_emb_mat = tf.concat( axis=0, values=[word_emb_mat, self.new_emb_mat]) with tf.name_scope("word"): Ax = tf.nn.embedding_lookup(word_emb_mat, self.x) # [N, JX, d] Ay = tf.nn.embedding_lookup(word_emb_mat, self.y) # [N, JX, d] self.tensor_dict['x'] = Ax self.tensor_dict['y'] = Ay if config.use_char_emb: xx = tf.concat(axis=2, values=[xx, Ax]) # [N, M, JX, di] yy = tf.concat(axis=2, values=[yy, Ay]) # [N, JQ, di] else: xx = Ax yy = Ay # highway network if config.highway: with tf.variable_scope("highway"): xx = highway_network(xx, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train) tf.get_variable_scope().reuse_variables() yy = highway_network(yy, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train) self.tensor_dict['xx'] = xx self.tensor_dict['yy'] = yy with tf.variable_scope("encode_x"): self.fwd_lstm = BasicLSTMCell(self.h_dim, state_is_tuple=True) self.x_output, self.x_state = dynamic_rnn(cell=self.fwd_lstm, inputs=xx, dtype=tf.float32) # self.x_output, self.x_state = bidirectional_dynamic_rnn(cell_fw=self.fwd_lstm,cell_bw=self.bwd_lstm,inputs=self.x_emb,dtype=tf.float32) # print(self.x_output) with tf.variable_scope("encode_y"): self.fwd_lstm = BasicLSTMCell(self.h_dim, state_is_tuple=True) self.y_output, self.y_state = dynamic_rnn( cell=self.fwd_lstm, inputs=yy, initial_state=self.x_state, dtype=tf.float32) # print self.y_output # print self.y_state length = get_sequence_length(self.y_output) self.Y = get_last_relevant_rnn_output(self.y_output, length) self.hstar = self.Y self.W_pred = tf.get_variable("W_pred", shape=[self.h_dim, 3]) self.logits = tf.matmul(self.hstar, self.W_pred) print("logits:", self.logits)
def _build_forward(self): config = self.config N, JX, VW, VC, d, W = \ config.batch_size, config.max_sent_size, \ config.word_vocab_size, config.char_vocab_size, \ config.hidden_size, config.max_word_size dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size with tf.variable_scope("emb"): if config.use_char_emb: with tf.variable_scope("emb_var"), tf.device("/cpu:0"): char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float') with tf.variable_scope("char"): Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx) # [N, JX, W, dc] Acy = tf.nn.embedding_lookup(char_emb_mat, self.cy) # [N, JX, W, dc] filter_sizes = list(map(int, config.out_channel_dims.split(','))) heights = list(map(int, config.filter_heights.split(','))) assert sum(filter_sizes) == dco, (filter_sizes, dco) with tf.variable_scope("conv"): xx = multi_conv1d(Acx, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx") if config.share_cnn_weights: tf.get_variable_scope().reuse_variables() yy = multi_conv1d(Acy, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx") else: yy = multi_conv1d(Acy, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="yy") xx = tf.reshape(xx, [-1, JX, dco]) yy = tf.reshape(yy, [-1, JX, dco]) if config.use_word_emb: with tf.variable_scope("emb_var"), tf.device("/cpu:0"): if config.mode == 'train': word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat)) else: word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float') if config.use_glove_for_unk: word_emb_mat = tf.concat(axis=0, values=[word_emb_mat, self.new_emb_mat]) with tf.name_scope("word"): Ax = tf.nn.embedding_lookup(word_emb_mat, self.x) # [N, JX, d] Ay = tf.nn.embedding_lookup(word_emb_mat, self.y) # [N, JX, d] self.tensor_dict['x'] = Ax self.tensor_dict['y'] = Ay if config.use_char_emb: xx = tf.concat(axis=2, values=[xx, Ax]) # [N, M, JX, di] yy = tf.concat(axis=2, values=[yy, Ay]) # [N, JQ, di] else: xx = Ax yy = Ay # highway network if config.highway: with tf.variable_scope("highway"): xx = highway_network(xx, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train) tf.get_variable_scope().reuse_variables() yy = highway_network(yy, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train) self.tensor_dict['xx'] = xx self.tensor_dict['yy'] = yy print(xx) xx = tf.reduce_sum(xx, 1) print(xx) yy = tf.reduce_sum(yy, 1) con = tf.concat([xx, yy], 1) print(con) self.W1 = tf.get_variable("W1", shape=[self.h_dim*2, 200]) self.a1 = tf.tanh(tf.matmul(con, self.W1)) self.W2 = tf.get_variable("W2", shape=[self.h_dim*2, 200]) self.a2 = tf.tanh(tf.matmul(self.a1, self.W2)) self.W_pred = tf.get_variable("W_pred", shape=[self.h_dim*2, 3]) self.logits = tf.matmul(self.a2, self.W_pred) print("logits:", self.logits)
def _build_forward(self): config = self.config N, JX, VW, VC, d, W = \ config.batch_size, config.max_sent_size, \ config.word_vocab_size, config.char_vocab_size, \ config.hidden_size, config.max_word_size dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size # Getting word vector. For now, we only care about word_emb. Forget about char_emb. with tf.variable_scope("emb"): if config.use_word_emb: with tf.variable_scope("emb_var"), tf.device("/cpu:0"): if config.mode == 'train': word_emb_mat = tf.get_variable( "word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat)) else: word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float') if config.use_glove_for_unk: word_emb_mat = tf.concat( axis=0, values=[word_emb_mat, self.new_emb_mat]) with tf.name_scope("word"): Ax = tf.nn.embedding_lookup(word_emb_mat, self.x) # [N, JX, d] Ay = tf.nn.embedding_lookup(word_emb_mat, self.x) # [N, JX, d] self.tensor_dict['x'] = Ax self.tensor_dict['y'] = Ay xx = Ax yy = Ay # xx is the preocessed encoder input, # yy should be derived from xx. # highway network if config.highway: with tf.variable_scope("highway"): xx = highway_network(xx, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train) tf.get_variable_scope().reuse_variables() yy = highway_network(yy, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train) self.tensor_dict['xx'] = xx self.tensor_dict['yy'] = yy self.encoder_inputs_embedded = xx self.decoder_train_inputs_embedded = yy self.decoder_train_length = self.y_length self.decoder_train_targets = self.x print("train_target:", self.decoder_train_targets) with tf.variable_scope("Encoder") as scope: encoder_cell = LSTMCell(self.h_dim, state_is_tuple=True) (self.encoder_outputs, self.encoder_state) = (tf.nn.dynamic_rnn( cell=encoder_cell, inputs=self.encoder_inputs_embedded, sequence_length=self.x_length, dtype=tf.float32)) with tf.variable_scope("Decoder") as scope: decoder_cell = LSTMCell(self.h_dim, state_is_tuple=True) print("self.decoder_train_inputs_embedded:", self.decoder_train_inputs_embedded) print("self.decoder_train_length:", self.decoder_train_length) helper = seq2seq.TrainingHelper(self.decoder_train_inputs_embedded, self.decoder_train_length) # Try schduled training helper. It may increase performance. decoder = seq2seq.BasicDecoder(cell=decoder_cell, helper=helper, initial_state=self.encoder_state) # Try AttentionDecoder. self.decoder_outputs_train, self.decoder_state_train = seq2seq.dynamic_decode( decoder, impute_finished=True, scope=scope, ) print("shape of self.decoder_outputs_train.rnn_output", self.decoder_outputs_train.rnn_output) self.decoder_logits = self.decoder_outputs_train.rnn_output w_t = tf.get_variable("proj_w", [self.vocab_size, self.h_dim], dtype=tf.float32) w = tf.transpose(w_t) b = tf.get_variable("proj_b", [self.vocab_size], dtype=tf.float32) self.output_projection = (w, b) m = tf.matmul(tf.reshape(self.decoder_logits, [-1, self.h_dim]), w) print("m:", m) self.decoder_prediction_train = tf.argmax( tf.reshape(m, [N, -1, self.vocab_size]) + self.output_projection[1], axis=-1, name='decoder_prediction_train')