def call(self, enc_output, dec_hidden, enc_inp, enc_extended_inp, dec_inp, batch_oov_len, enc_padding_mask, use_coverage, prev_coverage): predictions = [] attentions = [] coverages = [] p_gens = [] """ 通过调用attention得到decoder第一步所需的context_vector,coverage等值 your code """ context_vector, attn_dist, coverage_next = self.attention(dec_hidden, enc_output, enc_padding_mask, use_coverage, prev_coverage) for t in range(dec_inp.shape[1]): # Teachering Forcing dec_x, pred, dec_hidden = self.decoder(tf.expand_dims(dec_inp[:, t], 1), dec_hidden, enc_output, context_vector) context_vector, attn_dist, coverage_next = self.attention(dec_hidden, enc_output, enc_padding_mask, use_coverage, coverage_next) p_gen = self.pointer(context_vector, dec_hidden, tf.squeeze(dec_x, axis=1)) predictions.append(pred) coverages.append(coverage_next) attentions.append(attn_dist) p_gens.append(p_gen) """ 调用calc_final_dist函数完成PGN最终预测概率输出 your code """ final_dists = decoding.calc_final_dist(enc_extended_inp, predictions, attentions, p_gens, batch_oov_len, self.params['vocab_size'], self.params['batch_size']) # outputs = dict(logits=tf.stack(final_dists, 1), dec_hidden=dec_hidden, attentions=attentions, coverages=coverages) if self.params['mode'] == "train": outputs = dict(logits=final_dists, dec_hidden=dec_hidden, attentions=attentions, coverages=coverages, p_gens=p_gens) else: outputs = dict(logits=tf.stack(final_dists, 1), dec_hidden=dec_hidden, attentions=tf.stack(attentions, 1), coverages=tf.stack(coverages, 1), p_gens=tf.stack(p_gens, 1)) return outputs
def call(self, enc_output, dec_hidden, enc_inp, enc_extended_inp, dec_inp, batch_oov_len, enc_padding_mask, use_coverage, prev_coverage): predictions = [] attentions = [] coverages = [] p_gens = [] """ 通过调用attention得到decoder第一步所需的context_vector,coverage等值 """ context_vector, _, coverage_next = self.attention( dec_hidden, enc_output, enc_padding_mask, use_coverage, prev_coverage) # print(dec_inp.shape) for t in range(dec_inp.shape[1]): # Teachering Forcing dec_x, pred, dec_hidden = self.decoder( tf.expand_dims(dec_inp[:, t], 1), dec_hidden, enc_output, context_vector) context_vector, attn_dist, coverage_next = self.attention( dec_hidden, enc_output, enc_padding_mask, use_coverage, coverage_next) p_gen = self.pointer(context_vector, dec_hidden, tf.squeeze(dec_x, axis=1)) predictions.append(pred) # (seq_len,batch_size, vocab) coverages.append(coverage_next) attentions.append(attn_dist) p_gens.append(p_gen) """ 调用calc_final_dist函数完成PGN最终预测概率输出 """ final_dists = decoding.calc_final_dist( _enc_batch_extend_vocab=enc_extended_inp, # enc序列输入(unk全部被替换了) vocab_dists=predictions, attn_dists=attentions, p_gens=p_gens, batch_oov_len=batch_oov_len, vocab_size=self.params["vocab_size"], batch_size=self.params["batch_size"]) # print("predictions", len(predictions)) # print("final_dists", len(final_dists)) # outputs = dict(logits=tf.stack(final_dists, 1), dec_hidden=dec_hidden, attentions=attentions, coverages=coverages) if self.params['mode'] == "train": outputs = dict(logits=final_dists, dec_hidden=dec_hidden, attentions=attentions, coverages=coverages, p_gens=p_gens) else: outputs = dict( logits=tf.stack(final_dists, 1), # 1 dec_hidden=dec_hidden, attentions=tf.stack(attentions, 1), coverages=tf.stack(coverages, 1), p_gens=tf.stack(p_gens, 1)) return outputs
def call(self, enc_output, dec_hidden, enc_inp, enc_extended_inp, dec_inp, batch_oov_len, enc_padding_mask, use_coverage, prev_coverage): predictions = [] attentions = [] coverages = [] p_gens = [] context_vector, attn_dist, coverage_next = self.attention(dec_hidden, enc_output, enc_padding_mask, use_coverage=use_coverage, prev_coverage=prev_coverage) for t in range(dec_inp.shape[1]): # Teachering Forcing 将真实结果导入下一个时间部而不是预测的结果 dec_x, pred, dec_hidden = self.decoder(tf.expand_dims(dec_inp[:, t], 1), dec_hidden, enc_output, context_vector) context_vector, attn_dist, coverage_next = self.attention(dec_hidden, enc_output, enc_padding_mask, use_coverage, coverage_next) p_gen = self.pointer(context_vector, dec_hidden, tf.squeeze(dec_x, axis=1)) predictions.append(pred) coverages.append(coverage_next) attentions.append(attn_dist) p_gens.append(p_gen) final_dists = decoding.calc_final_dist(_enc_batch_extend_vocab = enc_extended_inp, vocab_dists = predictions, attn_dists = attentions, p_gens = p_gens, batch_oov_len = batch_oov_len, vocab_size = self.params["vocab_size"], batch_size = self.params["batch_size"]) # outputs = dict(logits=tf.stack(final_dists, 1), dec_hidden=dec_hidden, attentions=attentions, coverages=coverages) if self.params['mode'] == "train": outputs = dict(logits=final_dists, dec_hidden=dec_hidden, attentions=attentions, coverages=coverages, p_gens=p_gens) else: outputs = dict(logits=tf.stack(final_dists, 1), dec_hidden=dec_hidden, attentions=tf.stack(attentions, 1), coverages=tf.stack(coverages, 1), p_gens=tf.stack(p_gens, 1)) return outputs
def call(self, inp, tar, training, enc_padding_mask, look_ahead_mask, dec_padding_mask): enc_output = self.encoder(inp, training, enc_padding_mask) # (batch_size, inp_seq_len, d_model) # dec_output.shape == (batch_size, tar_seq_len, d_model) dec_output, attention_weights = self.decoder(tar, enc_output, training, look_ahead_mask, dec_padding_mask) final_output = self.final_layer(dec_output) # (batch_size, tar_seq_len, target_vocab_size) if self.params["pointer_gen"]: final_dists = calc_final_dist(enc_extended_inp, predictions, attentions, p_gens, batch_oov_len, self.params["vocab_size"], self.params["batch_size"]) outputs = dict(logits=tf.stack(final_dists, 1), attentions=attention_weights) return outputs
def _decode_target(self, enc_output, dec_hidden, enc_extended_inp, dec_inp, batch_oov_len, enc_padding_mask, use_coverage, prev_coverage): context_vector, attn_dist, coverage_next = self.attention( dec_hidden, # shape=(16, 256) enc_output, # shape=(16, 200, 256) enc_padding_mask, # (16, 200) use_coverage, prev_coverage) # None predictions = [] attentions = [] coverages = [] p_gens = [] for t in range(dec_inp.shape[1]): # Teachering Forcing dec_x, pred, dec_hidden = self.decoder( tf.expand_dims(dec_inp[:, t], 1), dec_hidden, enc_output, context_vector) context_vector, attn_dist, coverage_next = self.attention( dec_hidden, enc_output, enc_padding_mask, use_coverage, coverage_next) p_gen = self.pointer(context_vector, dec_hidden, tf.squeeze(dec_x, axis=1)) predictions.append(pred) coverages.append(coverage_next) attentions.append(attn_dist) p_gens.append(p_gen) final_dists = decoding.calc_final_dist(enc_extended_inp, predictions, attentions, p_gens, batch_oov_len, self.params["vocab_size"], self.params["batch_size"]) outputs = dict(logits=tf.stack(final_dists, 1), dec_hidden=dec_hidden, attentions=attentions, coverages=coverages) return outputs