def lcr_rot(input_fw, input_bw, sen_len_fw, sen_len_bw, target, sen_len_tr, keep_prob1, keep_prob2, l2, _id='all'): print('I am lcr_rot_inv.') cell = tf.contrib.rnn.LSTMCell # left hidden input_fw = tf.nn.dropout(input_fw, keep_prob=keep_prob1) hiddens_l = bi_dynamic_rnn(cell, input_fw, FLAGS.n_hidden, sen_len_fw, FLAGS.max_sentence_len, 'l' + _id, 'all') pool_l = reduce_mean_with_len(hiddens_l, sen_len_fw) # right hidden input_bw = tf.nn.dropout(input_bw, keep_prob=keep_prob1) hiddens_r = bi_dynamic_rnn(cell, input_bw, FLAGS.n_hidden, sen_len_bw, FLAGS.max_sentence_len, 'r' + _id, 'all') pool_r = reduce_mean_with_len(hiddens_r, sen_len_bw) # target hidden target = tf.nn.dropout(target, keep_prob=keep_prob1) hiddens_t = bi_dynamic_rnn(cell, target, FLAGS.n_hidden, sen_len_tr, FLAGS.max_sentence_len, 't' + _id, 'all') pool_t = reduce_mean_with_len(hiddens_t, sen_len_tr) # attention target left att_t_l = bilinear_attention_layer(hiddens_t, pool_l, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tl') outputs_t_l = tf.squeeze(tf.matmul(att_t_l, hiddens_t)) # attention target right att_t_r = bilinear_attention_layer(hiddens_t, pool_r, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tr') outputs_t_r = tf.squeeze(tf.matmul(att_t_r, hiddens_t)) # attention left att_l = bilinear_attention_layer(hiddens_l, outputs_t_l, sen_len_fw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'l') # attention right att_r = bilinear_attention_layer(hiddens_r, outputs_t_r, sen_len_bw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'r') outputs_r = tf.squeeze(tf.matmul(att_r, hiddens_r)) outputs = tf.concat([outputs_l, outputs_r, outputs_t_l, outputs_t_r], 1) prob = softmax_layer(outputs, 8 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, l2, FLAGS.n_class) return prob, att_l, att_r, att_t_l, att_t_r
def bi_dynamic_lstm(self, x1, x2, len1, len2): x1 = tf.nn.dropout(x1, keep_prob=self.keep_prob1) x2 = tf.nn.dropout(x2, keep_prob=self.keep_prob2) cell = tf.nn.rnn_cell.LSTMCell output1 = bi_dynamic_rnn(cell, x1, self.n_hidden, len1, self.max_sentence_len, 'q1', self.t1) output2 = bi_dynamic_rnn(cell, x2, self.n_hidden, len2, self.max_sentence_len, 'q2', self.t1) output = tf.concat(1, [output1, output2]) # batch_size * 4n_hidden predict = softmax_layer(output, 4 * self.n_hidden, self.random_base, self.keep_prob2, self.l2_reg, self.n_class) return predict
def lcr_rot(input_fw, input_bw, sen_len_fw, sen_len_bw, target, sen_len_tr, keep_prob1, keep_prob2, l2, _id='all'): print('I am lcr_rot_alt.') cell = tf.contrib.rnn.LSTMCell # left hidden input_fw = tf.nn.dropout(input_fw, keep_prob=keep_prob1) hiddens_l = bi_dynamic_rnn(cell, input_fw, FLAGS.n_hidden, sen_len_fw, FLAGS.max_sentence_len, 'l' + _id, 'all') pool_l = reduce_mean_with_len(hiddens_l, sen_len_fw) # right hidden input_bw = tf.nn.dropout(input_bw, keep_prob=keep_prob1) hiddens_r = bi_dynamic_rnn(cell, input_bw, FLAGS.n_hidden, sen_len_bw, FLAGS.max_sentence_len, 'r' + _id, 'all') pool_r = reduce_mean_with_len(hiddens_r, sen_len_bw) # target hidden target = tf.nn.dropout(target, keep_prob=keep_prob1) hiddens_t = bi_dynamic_rnn(cell, target, FLAGS.n_hidden, sen_len_tr, FLAGS.max_sentence_len, 't' + _id, 'all') pool_t = reduce_mean_with_len(hiddens_t, sen_len_tr) # attention left attention, attention_masked = multidimensional_attention_layer( hiddens_l, pool_t, sen_len_fw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tl') # outputs_t_l_init = tf.transpose(tf.reduce_sum((tf.multiply(att_l, tf.transpose(hiddens_l, perm=[0, 2, 1]))), reduction_indices=-1, keep_dims=True) + 1e-9,[0,2,1]) # outputs_t_l = tf.squeeze(outputs_t_l_init) # # # attention right # att_r = multidimensional_attention_layer(hiddens_r, pool_t, sen_len_bw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tr') # outputs_t_r_init = tf.transpose(tf.reduce_sum((tf.multiply(att_r, tf.transpose(hiddens_r, perm=[0, 2, 1]))), reduction_indices=-1, keep_dims=True) + 1e-9,[0,2,1]) # outputs_t_r = tf.squeeze(outputs_t_r_init) # # # attention target left # att_t_l = multidimensional_attention_layer(hiddens_t, outputs_t_l, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'l') # outputs_l_init = tf.transpose(tf.reduce_sum((tf.multiply(att_t_l, tf.transpose(hiddens_t, perm=[0, 2, 1]))), reduction_indices=-1, keep_dims=True) + 1e-9,[0,2,1]) # outputs_l = tf.squeeze(outputs_l_init) # # # attention target right # att_t_r = multidimensional_attention_layer(hiddens_t, outputs_t_r, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'r') # outputs_r_init = tf.transpose(tf.reduce_sum((tf.multiply(att_t_r, tf.transpose(hiddens_t, perm=[0, 2, 1]))), reduction_indices=-1, keep_dims=True) + 1e-9,[0,2,1]) # outputs_r = tf.squeeze(outputs_r_init) # # outputs_init_context = tf.concat([outputs_t_l_init, outputs_t_r_init], 1) # outputs_init_target = tf.concat([outputs_l_init, outputs_r_init], 1) # att_outputs_context = dot_produce_attention_layer(outputs_init_context, None, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'fin1') # att_outputs_target = dot_produce_attention_layer(outputs_init_target, None, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'fin2') # outputs_l = tf.squeeze(tf.matmul(tf.expand_dims(att_outputs_target[:,:,0], 2), outputs_l_init)) # outputs_r = tf.squeeze(tf.matmul(tf.expand_dims(att_outputs_target[:,:,1], 2), outputs_r_init)) # outputs_t_l = tf.squeeze(tf.matmul(tf.expand_dims(att_outputs_context[:,:,0], 2), outputs_t_l_init)) # outputs_t_r = tf.squeeze(tf.matmul(tf.expand_dims(att_outputs_context[:,:,1], 2), outputs_t_r_init)) # # for i in range(1): # # attention target # att_l = multidimensional_attention_layer(hiddens_l, outputs_l, sen_len_fw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tl'+str(i)) # outputs_t_l_init = tf.transpose(tf.reduce_sum((tf.multiply(att_l, tf.transpose(hiddens_l, perm=[0, 2, 1]))), reduction_indices=-1, keep_dims=True) + 1e-9,[0,2,1]) # outputs_t_l = tf.squeeze(outputs_t_l_init) # # att_r = multidimensional_attention_layer(hiddens_r, outputs_r, sen_len_bw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tr'+str(i)) # outputs_t_r_init = tf.transpose(tf.reduce_sum((tf.multiply(att_r, tf.transpose(hiddens_r, perm=[0, 2, 1]))), reduction_indices=-1, keep_dims=True) + 1e-9,[0,2,1]) # outputs_t_r = tf.squeeze(outputs_t_r_init) # # # attention left # att_t_l = multidimensional_attention_layer(hiddens_t, outputs_t_l, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'l'+str(i)) # outputs_l_init = tf.transpose(tf.reduce_sum((tf.multiply(att_t_l, tf.transpose(hiddens_t, perm=[0, 2, 1]))), reduction_indices=-1, keep_dims=True) + 1e-9,[0,2,1]) # outputs_l = tf.squeeze(outputs_l_init) # # # attention right # att_t_r = multidimensional_attention_layer(hiddens_t, outputs_t_r, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'r'+str(i)) # outputs_r_init = tf.transpose(tf.reduce_sum((tf.multiply(att_t_r, tf.transpose(hiddens_t, perm=[0, 2, 1]))), reduction_indices=-1, keep_dims=True) + 1e-9,[0,2,1]) # outputs_r = tf.squeeze(outputs_r_init) # # outputs_init_context = tf.concat([outputs_t_l_init, outputs_t_r_init], 1) # outputs_init_target = tf.concat([outputs_l_init, outputs_r_init], 1) # att_outputs_context = dot_produce_attention_layer(outputs_init_context, None, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'fin1'+str(i)) # att_outputs_target = dot_produce_attention_layer(outputs_init_target, None, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'fin2'+str(i)) # outputs_l = tf.squeeze(tf.matmul(tf.expand_dims(att_outputs_target[:,:,0], 2), outputs_l_init)) # outputs_r = tf.squeeze(tf.matmul(tf.expand_dims(att_outputs_target[:,:,1], 2), outputs_r_init)) # outputs_t_l = tf.squeeze(tf.matmul(tf.expand_dims(att_outputs_context[:,:,0], 2), outputs_t_l_init)) # outputs_t_r = tf.squeeze(tf.matmul(tf.expand_dims(att_outputs_context[:,:,1], 2), outputs_t_r_init)) # # outputs_fin = tf.concat([outputs_l, outputs_r, outputs_t_l, outputs_t_r], 1) # prob = softmax_layer(outputs_fin, 8 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, l2, FLAGS.n_class) return attention, attention_masked
def lcr_rot(input_fw, input_bw, sen_len_fw, sen_len_bw, target, sen_len_tr, keep_prob1, keep_prob2, l2, _id='all', number_of_heads=FLAGS.heads): dim_head = int(np.ceil((2 * FLAGS.n_hidden) / number_of_heads)) random_base = FLAGS.random_base print('I am lcr_rot_alt.') cell = tf.contrib.rnn.LSTMCell # left hidden input_fw = tf.nn.dropout(input_fw, keep_prob=keep_prob1) hiddens_l = bi_dynamic_rnn(cell, input_fw, FLAGS.n_hidden, sen_len_fw, FLAGS.max_sentence_len, 'l' + _id, 'all') pool_l = reduce_mean_with_len(hiddens_l, sen_len_fw) # right hidden input_bw = tf.nn.dropout(input_bw, keep_prob=keep_prob1) hiddens_r = bi_dynamic_rnn(cell, input_bw, FLAGS.n_hidden, sen_len_bw, FLAGS.max_sentence_len, 'r' + _id, 'all') pool_r = reduce_mean_with_len(hiddens_r, sen_len_bw) # target hidden target = tf.nn.dropout(target, keep_prob=keep_prob1) hiddens_t = bi_dynamic_rnn(cell, target, FLAGS.n_hidden, sen_len_tr, FLAGS.max_sentence_len, 't' + _id, 'all') pool_t = reduce_mean_with_len(hiddens_t, sen_len_tr) # attention left + right for i in range(number_of_heads): # compute hidden states linear projections + also directly compute representations for that particular head att_l_q, outputs_q_t_l, hiddens_l_q = compute_head( hiddens_l, pool_t, sen_len_fw, 2 * FLAGS.n_hidden, FLAGS.max_sentence_len, dim_head, FLAGS.random_base, l2, 'tl' + str(i)) att_r_q, outputs_q_t_r, hiddens_r_q = compute_head( hiddens_r, pool_t, sen_len_bw, 2 * FLAGS.n_hidden, FLAGS.max_sentence_len, dim_head, FLAGS.random_base, l2, 'tr' + str(i)) if (i == 0): outputs_t_l = outputs_q_t_l outputs_t_r = outputs_q_t_r att_l = att_l_q att_r = att_r_q else: # final r^l en r^r for target2context already concatenated for each head outputs_t_l = tf.concat([outputs_t_l, outputs_q_t_l], 1) outputs_t_r = tf.concat([outputs_t_r, outputs_q_t_r], 1) att_l = tf.concat([att_l, att_l_q], 0) att_r = tf.concat([att_r, att_r_q], 0) for i in range(number_of_heads): # context2target mechanism att_t_l_q, outputs_q_l, hiddens_t_l_q = compute_head( hiddens_t, outputs_t_l, sen_len_tr, 2 * FLAGS.n_hidden, FLAGS.max_target_len, dim_head, FLAGS.random_base, l2, 'l' + str(i)) att_t_r_q, outputs_q_r, hiddens_t_r_q = compute_head( hiddens_t, outputs_t_r, sen_len_tr, 2 * FLAGS.n_hidden, FLAGS.max_target_len, dim_head, FLAGS.random_base, l2, 'r' + str(i)) if (i == 0): outputs_l = outputs_q_l outputs_r = outputs_q_r att_t_l = att_t_l_q att_t_r = att_t_r_q else: outputs_l = tf.concat([outputs_l, outputs_q_l], 1) outputs_r = tf.concat([outputs_r, outputs_q_r], 1) att_t_l = tf.concat([att_t_l, att_t_l_q], 0) att_t_r = tf.concat([att_t_r, att_t_r_q], 0) outputs_init_context = tf.concat( [tf.expand_dims(outputs_t_l, 1), tf.expand_dims(outputs_t_r, 1)], 1) outputs_init_target = tf.concat( [tf.expand_dims(outputs_l, 1), tf.expand_dims(outputs_r, 1)], 1) att_outputs_context = dot_produce_attention_layer(outputs_init_context, None, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'fin1') att_outputs_target = dot_produce_attention_layer(outputs_init_target, None, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'fin2') outputs_l = tf.squeeze( tf.matmul(tf.expand_dims(att_outputs_target[:, :, 0], 2), tf.expand_dims(outputs_l, 1))) outputs_r = tf.squeeze( tf.matmul(tf.expand_dims(att_outputs_target[:, :, 1], 2), tf.expand_dims(outputs_r, 1))) outputs_t_l = tf.squeeze( tf.matmul(tf.expand_dims(att_outputs_context[:, :, 0], 2), tf.expand_dims(outputs_t_l, 1))) outputs_t_r = tf.squeeze( tf.matmul(tf.expand_dims(att_outputs_context[:, :, 1], 2), tf.expand_dims(outputs_t_r, 1))) for i in range(2): for j in range(number_of_heads): att_l_q, outputs_q_t_l, hiddens_l_q = compute_head( hiddens_l, outputs_l, sen_len_fw, 2 * FLAGS.n_hidden, FLAGS.max_sentence_len, dim_head, FLAGS.random_base, l2, 'tl' + str(i) + str(j)) att_r_q, outputs_q_t_r, hiddens_r_q = compute_head( hiddens_r, outputs_r, sen_len_bw, 2 * FLAGS.n_hidden, FLAGS.max_sentence_len, dim_head, FLAGS.random_base, l2, 'tr' + str(i) + str(j)) att_t_l_q, outputs_q_l, hiddens_t_l_q = compute_head( hiddens_t, outputs_t_l, sen_len_tr, 2 * FLAGS.n_hidden, FLAGS.max_target_len, dim_head, FLAGS.random_base, l2, 'l' + str(i) + str(j)) att_t_r_q, outputs_q_r, hiddens_t_r_q = compute_head( hiddens_t, outputs_t_r, sen_len_tr, 2 * FLAGS.n_hidden, FLAGS.max_target_len, dim_head, FLAGS.random_base, l2, 'r' + str(i) + str(j)) if (i == 1 and j == 0): hiddens_l_new = hiddens_l_q hiddens_r_new = hiddens_r_q hiddens_t_l_new = hiddens_t_l_q hiddens_t_r_new = hiddens_t_r_q att_l = att_l_q att_r = att_r_q att_t_l = att_t_l_q att_t_r = att_t_r_q if (i == 1 and j != 0): hiddens_l_new = tf.concat([hiddens_l_new, hiddens_l_q], 0) hiddens_r_new = tf.concat([hiddens_r_new, hiddens_r_q], 0) hiddens_t_l_new = tf.concat([hiddens_t_l_new, hiddens_t_l_q], 0) hiddens_t_r_new = tf.concat([hiddens_t_r_new, hiddens_t_r_q], 0) att_l = tf.concat([att_l, att_l_q], 0) att_r = tf.concat([att_r, att_r_q], 0) att_t_l = tf.concat([att_t_l, att_t_l_q], 0) att_t_r = tf.concat([att_t_r, att_t_r_q], 0) if (j == 0): outputs_t_l_new = outputs_q_t_l outputs_t_r_new = outputs_q_t_r outputs_l_new = outputs_q_l outputs_r_new = outputs_q_r else: outputs_t_l_new = tf.concat([outputs_t_l_new, outputs_q_t_l], 1) outputs_t_r_new = tf.concat([outputs_t_r_new, outputs_q_t_r], 1) outputs_l_new = tf.concat([outputs_l_new, outputs_q_l], 1) outputs_r_new = tf.concat([outputs_r_new, outputs_q_r], 1) outputs_t_l = outputs_t_l_new outputs_t_r = outputs_t_r_new outputs_r = outputs_r_new outputs_l = outputs_l_new outputs_init_context = tf.concat( [tf.expand_dims(outputs_t_l, 1), tf.expand_dims(outputs_t_r, 1)], 1) outputs_init_target = tf.concat( [tf.expand_dims(outputs_l, 1), tf.expand_dims(outputs_r, 1)], 1) att_outputs_context = dot_produce_attention_layer( outputs_init_context, None, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'fin1' + str(i)) att_outputs_target = dot_produce_attention_layer( outputs_init_target, None, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'fin2' + str(i)) outputs_l = tf.squeeze( tf.matmul(tf.expand_dims(att_outputs_target[:, :, 0], 2), tf.expand_dims(outputs_l, 1))) outputs_r = tf.squeeze( tf.matmul(tf.expand_dims(att_outputs_target[:, :, 1], 2), tf.expand_dims(outputs_r, 1))) outputs_t_l = tf.squeeze( tf.matmul(tf.expand_dims(att_outputs_context[:, :, 0], 2), tf.expand_dims(outputs_t_l, 1))) outputs_t_r = tf.squeeze( tf.matmul(tf.expand_dims(att_outputs_context[:, :, 1], 2), tf.expand_dims(outputs_t_r, 1))) outputs_fin = tf.concat([outputs_l, outputs_r, outputs_t_l, outputs_t_r], 1) prob = softmax_layer(outputs_fin, 8 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, l2, FLAGS.n_class) return prob, att_l, att_r, att_t_l, att_t_r
def lcr_rot( input_fw, input_bw, sen_len_fw, sen_len_bw, target, sen_len_tr, keep_prob1, keep_prob2, l2, _id='all', ): print('I am lcr_rot_alt-V4.') cell = tf.contrib.rnn.LSTMCell # left hidden input_fw = tf.nn.dropout(input_fw, keep_prob=keep_prob1) hiddens_l = bi_dynamic_rnn(cell, input_fw, FLAGS.n_hidden, sen_len_fw, FLAGS.max_sentence_len, 'l' + _id, 'all') pool_l = reduce_mean_with_len(hiddens_l, sen_len_fw) # right hidden input_bw = tf.nn.dropout(input_bw, keep_prob=keep_prob1) hiddens_r = bi_dynamic_rnn(cell, input_bw, FLAGS.n_hidden, sen_len_bw, FLAGS.max_sentence_len, 'r' + _id, 'all') pool_r = reduce_mean_with_len(hiddens_r, sen_len_bw) # target hidden target = tf.nn.dropout(target, keep_prob=keep_prob1) hiddens_t = bi_dynamic_rnn(cell, target, FLAGS.n_hidden, sen_len_tr, FLAGS.max_sentence_len, 't' + _id, 'all') pool_t = reduce_mean_with_len(hiddens_t, sen_len_tr) # attention left att_l = bilinear_attention_layer(hiddens_l, pool_t, sen_len_fw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tl') outputs_t_l_init = tf.matmul(att_l, hiddens_l) outputs_t_l = tf.squeeze(outputs_t_l_init) # attention right att_r = bilinear_attention_layer(hiddens_r, pool_t, sen_len_bw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tr') outputs_t_r_init = tf.matmul(att_r, hiddens_r) outputs_t_r = tf.squeeze(outputs_t_r_init) # attention target left att_t_l = bilinear_attention_layer(hiddens_t, outputs_t_l, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'l') outputs_l_init = tf.matmul(att_t_l, hiddens_t) outputs_l = tf.squeeze(outputs_l_init) # attention target right att_t_r = bilinear_attention_layer(hiddens_t, outputs_t_r, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'r') outputs_r_init = tf.matmul(att_t_r, hiddens_t) outputs_r = tf.squeeze(outputs_r_init) outputs_init_context = tf.concat([outputs_t_l_init, outputs_t_r_init], 1) outputs_init_target = tf.concat([outputs_l_init, outputs_r_init], 1) att_outputs_context = dot_produce_attention_layer(outputs_init_context, None, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'fin1') att_outputs_target = dot_produce_attention_layer(outputs_init_target, None, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'fin2') outputs_l = tf.squeeze( tf.matmul(tf.expand_dims(att_outputs_target[:, :, 0], 2), outputs_l_init)) outputs_r = tf.squeeze( tf.matmul(tf.expand_dims(att_outputs_target[:, :, 1], 2), outputs_r_init)) outputs_t_l = tf.squeeze( tf.matmul(tf.expand_dims(att_outputs_context[:, :, 0], 2), outputs_t_l_init)) outputs_t_r = tf.squeeze( tf.matmul(tf.expand_dims(att_outputs_context[:, :, 1], 2), outputs_t_r_init)) for i in range(2): # attention target att_l = bilinear_attention_layer(hiddens_l, outputs_l, sen_len_fw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tl' + str(i)) outputs_t_l_init = tf.matmul(att_l, hiddens_l) outputs_t_l = tf.squeeze(outputs_t_l_init) att_r = bilinear_attention_layer(hiddens_r, outputs_r, sen_len_bw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tr' + str(i)) outputs_t_r_init = tf.matmul(att_r, hiddens_r) outputs_t_r = tf.squeeze(outputs_t_r_init) # attention left att_t_l = bilinear_attention_layer(hiddens_t, outputs_t_l, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'l' + str(i)) outputs_l_init = tf.matmul(att_t_l, hiddens_t) outputs_l = tf.squeeze(outputs_l_init) # attention right att_t_r = bilinear_attention_layer(hiddens_t, outputs_t_r, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'r' + str(i)) outputs_r_init = tf.matmul(att_t_r, hiddens_t) outputs_r = tf.squeeze(outputs_r_init) outputs_init_context = tf.concat([outputs_t_l_init, outputs_t_r_init], 1) outputs_init_target = tf.concat([outputs_l_init, outputs_r_init], 1) att_outputs_context = dot_produce_attention_layer( outputs_init_context, None, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'fin1' + str(i)) att_outputs_target = dot_produce_attention_layer( outputs_init_target, None, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'fin2' + str(i)) outputs_l = tf.squeeze( tf.matmul(tf.expand_dims(att_outputs_target[:, :, 0], 2), outputs_l_init)) outputs_r = tf.squeeze( tf.matmul(tf.expand_dims(att_outputs_target[:, :, 1], 2), outputs_r_init)) outputs_t_l = tf.squeeze( tf.matmul(tf.expand_dims(att_outputs_context[:, :, 0], 2), outputs_t_l_init)) outputs_t_r = tf.squeeze( tf.matmul(tf.expand_dims(att_outputs_context[:, :, 1], 2), outputs_t_r_init)) outputs_fin = tf.concat([outputs_l, outputs_r, outputs_t_l, outputs_t_r], 1) prob = softmax_layer(outputs_fin, 8 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, l2, FLAGS.n_class) return prob, att_l, att_r, att_t_l, att_t_r
def lcr_rot(n_asp, input_fw, input_bw, sen_len_fw, sen_len_bw, target, sen_len_tr, keep_prob1, keep_prob2, l2, _id='all'): print('I am lcr_rot_alt.') cell = tf.contrib.rnn.LSTMCell # left hidden input_fw = tf.nn.dropout(input_fw, keep_prob=keep_prob1) hiddens_l = bi_dynamic_rnn(cell, input_fw, FLAGS.n_hidden, sen_len_fw, FLAGS.max_sentence_len, 'l' + _id, 'all') pool_l = reduce_mean_with_len(hiddens_l, sen_len_fw) # right hidden input_bw = tf.nn.dropout(input_bw, keep_prob=keep_prob1) hiddens_r = bi_dynamic_rnn(cell, input_bw, FLAGS.n_hidden, sen_len_bw, FLAGS.max_sentence_len, 'r' + _id, 'all') pool_r = reduce_mean_with_len(hiddens_r, sen_len_bw) # target hidden target = tf.nn.dropout(target, keep_prob=keep_prob1) hiddens_t = bi_dynamic_rnn(cell, target, FLAGS.n_hidden, sen_len_tr, FLAGS.max_sentence_len, 't' + _id, 'all') pool_t = reduce_mean_with_len(hiddens_t, sen_len_tr) # attention left att_l = bilinear_attention_layer(hiddens_l, pool_t, sen_len_fw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tl') outputs_t_l = tf.squeeze(tf.matmul(att_l, hiddens_l)) # attention right att_r = bilinear_attention_layer(hiddens_r, pool_t, sen_len_bw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tr') outputs_t_r = tf.squeeze(tf.matmul(att_r, hiddens_r)) # attention target left att_t_l = bilinear_attention_layer(hiddens_t, outputs_t_l, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'l') outputs_l = tf.squeeze(tf.matmul(att_t_l, hiddens_t)) # attention target right att_t_r = bilinear_attention_layer(hiddens_t, outputs_t_r, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'r') outputs_r = tf.squeeze(tf.matmul(att_t_r, hiddens_t)) for i in range(2): # attention target att_l = bilinear_attention_layer(hiddens_l, pool_t, sen_len_fw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tl' + str(i)) outputs_t_l = tf.squeeze(tf.matmul(att_l, hiddens_l)) att_r = bilinear_attention_layer(hiddens_r, pool_t, sen_len_bw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tr' + str(i)) outputs_t_r = tf.squeeze(tf.matmul(att_r, hiddens_r)) # attention left att_t_l = bilinear_attention_layer(hiddens_t, outputs_t_l, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'l' + str(i)) outputs_l = tf.squeeze(tf.matmul(att_t_l, hiddens_t)) # attention right att_t_r = bilinear_attention_layer(hiddens_t, outputs_t_r, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'r' + str(i)) outputs_r = tf.squeeze(tf.matmul(att_t_r, hiddens_t)) outputs = tf.concat([outputs_l, outputs_r, outputs_t_l, outputs_t_r], 1) prob = softmax_layer(outputs, 8 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, l2, FLAGS.n_class) asp_size = tf.size(n_asp) begin = tf.map_fn(lambda x: tf.math.reduce_sum(n_asp[:x], 0), tf.range(asp_size), tf.int32) prob_sen = tf.map_fn( lambda i: tf.clip_by_value( tf.reduce_sum(tf.slice(prob, [begin[i], 0], [n_asp[i], FLAGS.n_class]), axis=0), \ clip_value_min = 0. , clip_value_max = 1.), \ tf.range(asp_size), \ dtype = tf.float32) return prob, prob_sen, outputs, att_l, att_r, att_t_l, att_t_r
def lcr_rot(input_fw, input_bw, sen_len_fw, sen_len_bw, target, sen_len_tr, keep_prob1, keep_prob2, l2, _id='all'): print('I am a new lcr_rot_alt. Trival2021 model 1') cell = tf.contrib.rnn.LSTMCell # left hidden input_fw = tf.nn.dropout(input_fw, keep_prob=keep_prob1) hiddens_l = bi_dynamic_rnn(cell, input_fw, FLAGS.n_hidden, sen_len_fw, FLAGS.max_sentence_len, 'l' + _id, 'all') pool_l = reduce_mean_with_len(hiddens_l, sen_len_fw) # right hidden input_bw = tf.nn.dropout(input_bw, keep_prob=keep_prob1) hiddens_r = bi_dynamic_rnn(cell, input_bw, FLAGS.n_hidden, sen_len_bw, FLAGS.max_sentence_len, 'r' + _id, 'all') pool_r = reduce_mean_with_len(hiddens_r, sen_len_bw) # target hidden target = tf.nn.dropout(target, keep_prob=keep_prob1) hiddens_t = bi_dynamic_rnn(cell, target, FLAGS.n_hidden, sen_len_tr, FLAGS.max_sentence_len, 't' + _id, 'all') pool_t = reduce_mean_with_len(hiddens_t, sen_len_tr) # attention left and attention left target # attention target left att_t_l = bilinear_attention_layer(hiddens_t, pool_l, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'l') outputs_l_init = tf.matmul(att_t_l, hiddens_t) outputs_l_org = tf.squeeze(outputs_l_init) # attention target right att_t_r = bilinear_attention_layer(hiddens_t, pool_r, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'r') outputs_r_init = tf.matmul(att_t_r, hiddens_t) outputs_r_org = tf.squeeze(outputs_r_init) # attention left att_l = bilinear_attention_layer(hiddens_l, pool_t, sen_len_fw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tl') outputs_t_l_init = tf.matmul(att_l, hiddens_l) outputs_t_l_org = tf.squeeze(outputs_t_l_init) # attention right att_r = bilinear_attention_layer(hiddens_r, pool_t, sen_len_bw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tr') outputs_t_r_init = tf.matmul(att_r, hiddens_r) outputs_t_r_org = tf.squeeze(outputs_t_r_init) # attention target left att_t_l = bilinear_attention_layer(hiddens_t, outputs_t_l_org, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'l5') outputs_l_init = tf.matmul(att_t_l, hiddens_t) outputs_l = tf.squeeze(outputs_l_init) # attention target right att_t_r = bilinear_attention_layer(hiddens_t, outputs_t_r_org, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'r5') outputs_r_init = tf.matmul(att_t_r, hiddens_t) outputs_r = tf.squeeze(outputs_r_init) # attention left att_l = bilinear_attention_layer(hiddens_l, outputs_l_org, sen_len_fw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tl5') outputs_t_l_init = tf.matmul(att_l, hiddens_l) outputs_t_l = tf.squeeze(outputs_t_l_init) # attention right att_r = bilinear_attention_layer(hiddens_r, outputs_r_org, sen_len_bw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tr5') outputs_t_r_init = tf.matmul(att_r, hiddens_r) outputs_t_r = tf.squeeze(outputs_t_r_init) outputs_init_1 = tf.concat([outputs_l_init, outputs_t_l_init], 1) att_outputs_1 = dot_produce_attention_layer(outputs_init_1, None, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'fin_aoa2') outputs_l = tf.squeeze(tf.matmul(tf.expand_dims(att_outputs_1[:,:,0], 2), outputs_l_init)) #outputs_r = tf.squeeze(tf.matmul(tf.expand_dims(att_outputs[:,:,1], 2), outputs_r_init)) outputs_t_l = tf.squeeze(tf.matmul(tf.expand_dims(att_outputs_1[:,:,1], 2), outputs_t_l_init)) #outputs_t_r = tf.squeeze(tf.matmul(tf.expand_dims(att_outputs[:,:,3], 2), outputs_t_r_init)) outputs_init_2 = tf.concat([outputs_r_init, outputs_t_r_init], 1) att_outputs_2 = dot_produce_attention_layer(outputs_init_2, None, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'fin_aoar') outputs_r = tf.squeeze(tf.matmul(tf.expand_dims(att_outputs_2[:,:,0], 2), outputs_r_init)) #outputs_r = tf.squeeze(tf.matmul(tf.expand_dims(att_outputs[:,:,1], 2), outputs_r_init)) outputs_t_r = tf.squeeze(tf.matmul(tf.expand_dims(att_outputs_2[:,:,1], 2), outputs_t_r_init)) #outputs_t_r = tf.squeeze(tf.matmul(tf.expand_dims(att_outputs[:,:,3], 2), outputs_t_r_init)) for i in range(1): ''' # attention left att_t_l = bilinear_attention_layer(hiddens_t, outputs_t_l, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'l'+str(i)) outputs_l_init = tf.matmul(att_t_l, hiddens_t) outputs_l = tf.squeeze(outputs_l_init) # attention right att_t_r = bilinear_attention_layer(hiddens_t, outputs_t_r, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'r'+str(i)) outputs_r_init = tf.matmul(att_t_r, hiddens_t) outputs_r = tf.squeeze(outputs_r_init) ''' #attention target left att_l = bilinear_attention_layer(hiddens_l, outputs_l, sen_len_fw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tl'+str(i)) outputs_t_l_init = tf.matmul(att_l, hiddens_l) outputs_t_l = tf.squeeze(outputs_t_l_init) #attention target right att_r = bilinear_attention_layer(hiddens_r, outputs_r, sen_len_bw, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'tr'+str(i)) outputs_t_r_init = tf.matmul(att_r, hiddens_r) outputs_t_r = tf.squeeze(outputs_t_r_init) #print(outputs_t_r.shape) # attention left att_t_l = bilinear_attention_layer(hiddens_t, outputs_t_l, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'l'+str(i)) outputs_l_init = tf.matmul(att_t_l, hiddens_t) outputs_l = tf.squeeze(outputs_l_init) # attention right att_t_r = bilinear_attention_layer(hiddens_t, outputs_t_r, sen_len_tr, 2 * FLAGS.n_hidden, l2, FLAGS.random_base, 'r'+str(i)) outputs_r_init = tf.matmul(att_t_r, hiddens_t) outputs_r = tf.squeeze(outputs_r_init) outputs = tf.concat([outputs_l, outputs_r, outputs_t_l, outputs_t_r], 1) prob = softmax_layer(outputs, 8 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, l2, FLAGS.n_class) return prob, att_l, att_r, att_t_l, att_t_r
def ram(self, inputs, aspect): # bi-lstm cell = tf.contrib.rnn.LSTMCell M = bi_dynamic_rnn(cell, inputs, self.n_hidden, self.sen_len, self.max_len, 'memory', 'all') u_t = tf.expand_dims( tf.cast(self.position, tf.float32) / tf.expand_dims(tf.cast(self.sen_len, tf.float32), -1), -1) w_t = 1.0 - tf.abs(u_t) M = tf.concat([w_t * M, u_t], 2) # batch_size * max_len * (2 * n_hidden + 1) batch_size = tf.shape(M)[0] # Attention Layer-1 e_0 = tf.zeros([batch_size, self.n_hidden]) aspect = tf.reshape(aspect, [-1, 1, self.embedding_dim]) aspect = tf.ones([batch_size, self.max_len, self.embedding_dim], dtype=tf.float32) * aspect e = tf.zeros([batch_size, self.max_len, self.n_hidden]) t_M = tf.concat([M, e, aspect], 2) t_M_dim = 2 * self.n_hidden + 1 + self.n_hidden + self.embedding_dim alpha = dot_produce_attention_layer(t_M, self.sen_len, t_M_dim, self.l2_reg, scope_name='att_1') i_al = tf.matmul(alpha, M) # batch_size * 1 * (2n_hidden + 1) i_al = tf.reshape(i_al, [batch_size, 2 * self.n_hidden + 1]) r = tf.sigmoid(tf.matmul(i_al, self.w_r) + tf.matmul(e_0, self.u_r)) # batch_size * n_hidden z = tf.sigmoid(tf.matmul(i_al, self.w_z) + tf.matmul(e_0, self.u_z)) # batch_size * n_hidden e_t1 = tf.tanh( tf.matmul(i_al, self.w_g) + tf.matmul(r * e_0, self.w_x)) # batch_size * n_hidden e_1 = (1.0 - z) * e_0 + z * e_t1 # Attention Layer-2 e = tf.ones([batch_size, self.max_len, self.n_hidden]) * tf.reshape( e_1, [batch_size, 1, self.n_hidden]) t_M = tf.concat([M, e, aspect], 2) alpha = dot_produce_attention_layer(t_M, self.sen_len, t_M_dim, self.l2_reg, scope_name='att_2') i_al = tf.matmul(alpha, M) i_al = tf.reshape(i_al, [batch_size, 2 * self.n_hidden + 1]) r = tf.sigmoid(tf.matmul(i_al, self.w_r) + tf.matmul(e_1, self.u_r)) # batch_size * n_hidden z = tf.sigmoid(tf.matmul(i_al, self.w_z) + tf.matmul(e_1, self.u_z)) # batch_size * n_hidden e_t2 = tf.tanh( tf.matmul(i_al, self.w_g) + tf.matmul(r * e_1, self.w_x)) # batch_size * n_hidden e_2 = (1.0 - z) * e_1 + z * e_t2 scores = tf.contrib.layers.fully_connected( inputs=e_2, num_outputs=self.n_class, # activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer( uniform=True), weights_regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg), scope='softmax') return scores