def encode_all(self, base_word_ids, source_word_ids, target_word_ids, insert_word_ids, common_word_ids): batch_size = tf.shape(base_word_ids)[0] with tf.name_scope('encode_all'): base_len = seq.length_pre_embedding(base_word_ids) src_len = seq.length_pre_embedding(source_word_ids) tgt_len = seq.length_pre_embedding(target_word_ids) iw_len = seq.length_pre_embedding(insert_word_ids) cw_len = seq.length_pre_embedding(common_word_ids) base_encoded, base_attention_bias = self.encoder( base_word_ids, base_len) kill_edit = self.config.editor.kill_edit draw_edit = self.config.editor.draw_edit if self.config.editor.decoder.allow_mev_st_attn \ or self.config.editor.decoder.allow_mev_ts_attn: assert kill_edit == False and draw_edit == False if kill_edit: edit_vector = tf.zeros( shape=(batch_size, self.config.editor.edit_encoder.edit_dim)) mev_st = mev_ts = None else: if draw_edit: edit_vector = random_noise_encoder( batch_size, self.config.editor.edit_encoder.edit_dim, self.config.editor.norm_max) mev_st = mev_ts = None else: edit_vector, mev_st, mev_ts = self.edit_encoder( source_word_ids, target_word_ids, insert_word_ids, common_word_ids, src_len, tgt_len, iw_len, cw_len, ) encoder_outputs = (base_encoded, base_attention_bias) edit_encoder_outputs = (edit_vector, mev_st, mev_ts) return encoder_outputs, edit_encoder_outputs
def editor_train(base_words, source_words, target_words, insert_words, delete_words, embed_matrix, vocab_table, hidden_dim, agenda_dim, edit_dim, num_encoder_layers, num_decoder_layers, attn_dim, beam_width, max_sent_length, dropout_keep, lamb_reg, norm_eps, norm_max, kill_edit, draw_edit, swap_memory, use_beam_decoder=False, use_dropout=False, no_insert_delete_attn=False, enable_vae=True): batch_size = tf.shape(source_words)[0] # [batch] base_len = seq.length_pre_embedding(base_words) src_len = seq.length_pre_embedding(source_words) tgt_len = seq.length_pre_embedding(target_words) iw_len = seq.length_pre_embedding(insert_words) dw_len = seq.length_pre_embedding(delete_words) # variable of shape [vocab_size, embed_dim] embeddings = vocab.get_embeddings() # [batch x max_len x embed_dim] base_word_embeds = vocab.embed_tokens(base_words) # src_word_embeds = vocab.embed_tokens(source_words) # tgt_word_embeds = vocab.embed_tokens(target_words) insert_word_embeds = vocab.embed_tokens(insert_words) delete_word_embeds = vocab.embed_tokens(delete_words) # [batch x max_len x rnn_out_dim], [batch x rnn_out_dim] base_sent_hidden_states, base_sent_embed = encoder.source_sent_encoder( base_word_embeds, base_len, hidden_dim, num_encoder_layers, use_dropout=use_dropout, dropout_keep=dropout_keep, swap_memory=swap_memory) # [batch x edit_dim] if kill_edit: edit_vector = tf.zeros(shape=(batch_size, edit_dim)) else: if draw_edit: edit_vector = edit_encoder.random_noise_encoder( batch_size, edit_dim, norm_max) else: edit_vector = edit_encoder.accumulator_encoder( insert_word_embeds, delete_word_embeds, iw_len, dw_len, edit_dim, lamb_reg, norm_eps, norm_max, dropout_keep, enable_vae=enable_vae) # [batch x agenda_dim] input_agenda = agn.linear(base_sent_embed, edit_vector, agenda_dim) train_dec_inp, train_dec_inp_len, \ train_dec_out, train_dec_out_len = prepare_decoder_input_output(target_words, tgt_len, vocab_table) train_decoder = decoder.train_decoder( input_agenda, embeddings, train_dec_inp, base_sent_hidden_states, insert_word_embeds, delete_word_embeds, train_dec_inp_len, base_len, iw_len, dw_len, attn_dim, hidden_dim, num_decoder_layers, swap_memory, enable_dropout=use_dropout, dropout_keep=dropout_keep, no_insert_delete_attn=no_insert_delete_attn) if use_beam_decoder: infr_decoder = decoder.beam_eval_decoder( input_agenda, embeddings, vocab.get_token_id(vocab.START_TOKEN, vocab_table), vocab.get_token_id(vocab.STOP_TOKEN, vocab_table), base_sent_hidden_states, insert_word_embeds, delete_word_embeds, base_len, iw_len, dw_len, attn_dim, hidden_dim, num_decoder_layers, max_sent_length, beam_width, swap_memory, enable_dropout=use_dropout, dropout_keep=dropout_keep, no_insert_delete_attn=no_insert_delete_attn) else: infr_decoder = decoder.greedy_eval_decoder( input_agenda, embeddings, vocab.get_token_id(vocab.START_TOKEN, vocab_table), vocab.get_token_id(vocab.STOP_TOKEN, vocab_table), base_sent_hidden_states, insert_word_embeds, delete_word_embeds, base_len, iw_len, dw_len, attn_dim, hidden_dim, num_decoder_layers, max_sent_length, swap_memory, enable_dropout=use_dropout, dropout_keep=dropout_keep, no_insert_delete_attn=no_insert_delete_attn) return train_decoder, infr_decoder, train_dec_out, train_dec_out_len
def test_decoder_train(dataset_file, embedding_file): with tf.Graph().as_default(): d_fn, gold_dataset = dataset_file e_fn, gold_embeds = embedding_file v, embed_matrix = vocab.read_word_embeddings(e_fn, EMBED_DIM) vocab_lookup = vocab.get_vocab_lookup(v) stop_token = tf.constant(bytes(vocab.STOP_TOKEN, encoding='utf8'), dtype=tf.string) stop_token_id = vocab_lookup.lookup(stop_token) start_token = tf.constant(bytes(vocab.START_TOKEN, encoding='utf8'), dtype=tf.string) start_token_id = vocab_lookup.lookup(start_token) pad_token = tf.constant(bytes(vocab.PAD_TOKEN, encoding='utf8'), dtype=tf.string) pad_token_id = vocab_lookup.lookup(pad_token) dataset = neural_editor.input_fn(d_fn, vocab_lookup, BATCH_SIZE, NUM_EPOCH) iter = dataset.make_initializable_iterator() (src, tgt, inw, dlw), _ = iter.get_next() src_len = sequence.length_pre_embedding(src) tgt_len = sequence.length_pre_embedding(tgt) dec_inputs = decoder.prepare_decoder_inputs(tgt, start_token_id) dec_outputs = decoder.prepare_decoder_output(tgt, tgt_len, stop_token_id, pad_token_id) dec_inputs_len = sequence.length_pre_embedding(dec_inputs) dec_outputs_len = sequence.length_pre_embedding(dec_outputs) batch_size = tf.shape(src)[0] edit_vector = edit_encoder.random_noise_encoder( batch_size, EDIT_DIM, 14.0) embedding = tf.get_variable( 'embeddings', shape=embed_matrix.shape, initializer=tf.constant_initializer(embed_matrix)) src_embd = tf.nn.embedding_lookup(embedding, src) src_sent_embeds, final_states = encoder.source_sent_encoder( src_embd, src_len, 20, 3, 0.8) agn = agenda.linear(final_states, edit_vector, 4) dec_out = decoder.train_decoder(agn, embedding, dec_inputs, src_sent_embeds, tf.nn.embedding_lookup(embedding, inw), tf.nn.embedding_lookup(embedding, dlw), dec_inputs_len, src_len, sequence.length_pre_embedding(inw), sequence.length_pre_embedding(dlw), 5, 20, 3, False) # eval_dec_out = decoder.greedy_eval_decoder( # agn, embedding, # start_token_id, stop_token_id, # src_sent_embeds, # tf.nn.embedding_lookup(embedding, inw), # tf.nn.embedding_lookup(embedding, dlw), # src_len, sequence.length_pre_embedding(inw), sequence.length_pre_embedding(dlw), # 5, 20, 3, 40 # ) eval_dec_out = decoder.beam_eval_decoder( agn, embedding, start_token_id, stop_token_id, src_sent_embeds, tf.nn.embedding_lookup(embedding, inw), tf.nn.embedding_lookup(embedding, dlw), src_len, sequence.length_pre_embedding(inw), sequence.length_pre_embedding(dlw), 5, 20, 3, 40) # saver = tf.train.Saver(write_version=tf.train.SaverDef.V1) # s = tf.summary.FileWriter('data/an') # s.add_graph(g) # # all_print = tf.get_collection('print') an, final_states, len = dec_out stacked = decoder.attention_score(dec_out) with tf.Session() as sess: sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer(), tf.tables_initializer() ]) sess.run(iter.initializer) print(sess.run([eval_dec_out]))
def editor_train(base_words, extended_base_words, output_words, extended_output_words, source_words, target_words, insert_words, delete_words, oov, vocab_size, hidden_dim, agenda_dim, edit_dim, micro_edit_ev_dim, num_heads, num_encoder_layers, num_decoder_layers, attn_dim, beam_width, ctx_hidden_dim, ctx_hidden_layer, wa_hidden_dim, wa_hidden_layer, meve_hidden_dim, meve_hidden_layers, recons_dense_layers, max_sent_length, dropout_keep, lamb_reg, norm_eps, norm_max, kill_edit, draw_edit, swap_memory, use_beam_decoder=False, use_dropout=False, no_insert_delete_attn=False, enable_vae=True): batch_size = tf.shape(source_words)[0] # [batch] base_len = seq.length_pre_embedding(base_words) output_len = seq.length_pre_embedding(extended_output_words) src_len = seq.length_pre_embedding(source_words) tgt_len = seq.length_pre_embedding(target_words) iw_len = seq.length_pre_embedding(insert_words) dw_len = seq.length_pre_embedding(delete_words) # variable of shape [vocab_size, embed_dim] embeddings = vocab.get_embeddings() # [batch x max_len x embed_dim] base_word_embeds = vocab.embed_tokens(base_words) output_word_embeds = vocab.embed_tokens(output_words) # src_word_embeds = vocab.embed_tokens(source_words) # tgt_word_embeds = vocab.embed_tokens(target_words) insert_word_embeds = vocab.embed_tokens(insert_words) delete_word_embeds = vocab.embed_tokens(delete_words) # [batch x max_len x rnn_out_dim], [batch x rnn_out_dim] base_sent_hidden_states, base_sent_embed = encoder.source_sent_encoder( base_word_embeds, base_len, hidden_dim, num_encoder_layers, use_dropout=use_dropout, dropout_keep=dropout_keep, swap_memory=swap_memory) assert kill_edit == False and draw_edit == False # [batch x edit_dim] if kill_edit: edit_vector = tf.zeros(shape=(batch_size, edit_dim)) else: if draw_edit: edit_vector = random_noise_encoder(batch_size, edit_dim, norm_max) else: edit_vector = accumulator_encoder(insert_word_embeds, delete_word_embeds, iw_len, dw_len, edit_dim, lamb_reg, norm_eps, norm_max, dropout_keep, enable_vae=enable_vae) wa_inserted, wa_deleted = (tf.constant([[0]]), tf.constant( [[0]])), (tf.constant([[0]]), tf.constant([[0]])) # [batch x agenda_dim] base_agenda = agn.linear(base_sent_embed, edit_vector, agenda_dim) train_dec_inp, train_dec_inp_len, \ train_dec_out, train_dec_out_len = prepare_decoder_input_output(output_words, extended_output_words, output_len) train_dec_inp_extended = prepare_decoder_inputs(extended_output_words, tf.cast(-1, tf.int64)) train_decoder = decoder.train_decoder( base_agenda, embeddings, extended_base_words, oov, train_dec_inp, train_dec_inp_extended, base_sent_hidden_states, wa_inserted, wa_deleted, train_dec_inp_len, base_len, src_len, tgt_len, vocab_size, attn_dim, hidden_dim, num_decoder_layers, swap_memory, enable_dropout=use_dropout, dropout_keep=dropout_keep, no_insert_delete_attn=no_insert_delete_attn) if use_beam_decoder: infr_decoder = decoder.beam_eval_decoder( base_agenda, embeddings, extended_base_words, oov, vocab.get_token_id(vocab.START_TOKEN), vocab.get_token_id(vocab.STOP_TOKEN), base_sent_hidden_states, wa_inserted, wa_deleted, base_len, src_len, tgt_len, vocab_size, attn_dim, hidden_dim, num_decoder_layers, max_sent_length, beam_width, swap_memory, enable_dropout=use_dropout, dropout_keep=dropout_keep, no_insert_delete_attn=no_insert_delete_attn) else: infr_decoder = decoder.greedy_eval_decoder( base_agenda, embeddings, extended_base_words, oov, vocab.get_token_id(vocab.START_TOKEN), vocab.get_token_id(vocab.STOP_TOKEN), base_sent_hidden_states, wa_inserted, wa_deleted, base_len, src_len, tgt_len, vocab_size, attn_dim, hidden_dim, num_decoder_layers, max_sent_length, swap_memory, enable_dropout=use_dropout, dropout_keep=dropout_keep, no_insert_delete_attn=no_insert_delete_attn) edit_vector_recons = output_words_to_edit_vector( output_word_embeds, output_len, edit_dim, ctx_hidden_dim, ctx_hidden_layer, recons_dense_layers, swap_memory) optimizer.add_reconst_loss(edit_vector, edit_vector_recons) return train_decoder, infr_decoder, train_dec_out, train_dec_out_len
def editor_train(base_words, output_words, source_words, target_words, insert_words, delete_words, hidden_dim, agenda_dim, edit_dim, micro_edit_ev_dim, num_heads, num_encoder_layers, num_decoder_layers, attn_dim, beam_width, ctx_hidden_dim, ctx_hidden_layer, wa_hidden_dim, wa_hidden_layer, meve_hidden_dim, meve_hidden_layers, max_sent_length, dropout_keep, lamb_reg, norm_eps, norm_max, kill_edit, draw_edit, swap_memory, use_beam_decoder=False, use_dropout=False, no_insert_delete_attn=False, enable_vae=True): batch_size = tf.shape(source_words)[0] # [batch] base_len = seq.length_pre_embedding(base_words) output_len = seq.length_pre_embedding(output_words) src_len = seq.length_pre_embedding(source_words) tgt_len = seq.length_pre_embedding(target_words) iw_len = seq.length_pre_embedding(insert_words) dw_len = seq.length_pre_embedding(delete_words) # variable of shape [vocab_size, embed_dim] embeddings = vocab.get_embeddings() # [batch x max_len x embed_dim] base_word_embeds = vocab.embed_tokens(base_words) src_word_embeds = vocab.embed_tokens(source_words) tgt_word_embeds = vocab.embed_tokens(target_words) insert_word_embeds = vocab.embed_tokens(insert_words) delete_word_embeds = vocab.embed_tokens(delete_words) sent_encoder = tf.make_template('sent_encoder', encoder.source_sent_encoder, hidden_dim=hidden_dim, num_layer=num_encoder_layers, swap_memory=swap_memory, use_dropout=use_dropout, dropout_keep=dropout_keep) # [batch x max_len x rnn_out_dim], [batch x rnn_out_dim] base_sent_hidden_states, base_sent_embed = sent_encoder( base_word_embeds, base_len) assert kill_edit == False and draw_edit == False # [batch x edit_dim] if kill_edit: edit_vector = tf.zeros(shape=(batch_size, edit_dim)) else: if draw_edit: edit_vector = random_noise_encoder(batch_size, edit_dim, norm_max) else: edit_vector, wa_inserted, wa_deleted = attn_encoder( src_word_embeds, tgt_word_embeds, insert_word_embeds, delete_word_embeds, src_len, tgt_len, iw_len, dw_len, ctx_hidden_dim, ctx_hidden_layer, wa_hidden_dim, wa_hidden_layer, meve_hidden_dim, meve_hidden_layers, edit_dim, micro_edit_ev_dim, num_heads, lamb_reg, norm_eps, norm_max, sent_encoder, use_dropout=use_dropout, dropout_keep=dropout_keep, swap_memory=swap_memory, enable_vae=enable_vae) # [batch x agenda_dim] input_agenda = agn.linear(base_sent_embed, edit_vector, agenda_dim) train_dec_inp, train_dec_inp_len, \ train_dec_out, train_dec_out_len = prepare_decoder_input_output(output_words, output_len, None) train_decoder = decoder.train_decoder( input_agenda, embeddings, train_dec_inp, base_sent_hidden_states, wa_inserted, wa_deleted, train_dec_inp_len, base_len, iw_len, dw_len, attn_dim, hidden_dim, num_decoder_layers, swap_memory, enable_dropout=use_dropout, dropout_keep=dropout_keep, no_insert_delete_attn=no_insert_delete_attn) if use_beam_decoder: infr_decoder = decoder.beam_eval_decoder( input_agenda, embeddings, vocab.get_token_id(vocab.START_TOKEN), vocab.get_token_id(vocab.STOP_TOKEN), base_sent_hidden_states, wa_inserted, wa_deleted, base_len, iw_len, dw_len, attn_dim, hidden_dim, num_decoder_layers, max_sent_length, beam_width, swap_memory, enable_dropout=use_dropout, dropout_keep=dropout_keep, no_insert_delete_attn=no_insert_delete_attn) else: infr_decoder = decoder.greedy_eval_decoder( input_agenda, embeddings, vocab.get_token_id(vocab.START_TOKEN), vocab.get_token_id(vocab.STOP_TOKEN), base_sent_hidden_states, wa_inserted, wa_deleted, base_len, iw_len, dw_len, attn_dim, hidden_dim, num_decoder_layers, max_sent_length, swap_memory, enable_dropout=use_dropout, dropout_keep=dropout_keep, no_insert_delete_attn=no_insert_delete_attn) return train_decoder, infr_decoder, train_dec_out, train_dec_out_len