def pair_wise_decode(self): f = os.path.join(FLAGS.data_path, "output.txt") outputfile = codecs.open(f, "w", "utf8") output_result = [] list_of_reference = [] while True: batch = self._batcher.next_pairwised_decode_batch( ) # 1 example repeated across batch if batch is None: # finished decoding dataset in single_pass mode logging.info("eval_finished") outputfile.close() break print(self._batcher.c_index) original_article = batch.original_articles[0] # string original_abstract = batch.original_abstracts[0] # string original_abstract_sents = batch.original_abstracts_sents[ 0] # list of strings article_withunks = data.show_art_oovs(original_article, self._vocab) # string abstract_withunks = data.show_abs_oovs( original_abstract, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None)) # string # Run beam search to get best Hypothesis result = self.eval_one_batch(self._sess, self._model, self._vocab, batch) # result = self.eval_one_batch(self._sess, self._model, self._vocab, batch) for i, instance in enumerate(result): if i == len(batch.art_oovs): break if i >= batch.real_length: print("eval done with {} isntances".format( len(output_result))) break out_words = data.outputids2words(instance, self._model._vocab_out, batch.art_oovs[i]) if data.STOP_DECODING in out_words: out_words = out_words[:out_words.index(data.STOP_DECODING)] output_now = " ".join(out_words) output_result.append(output_now) # refer = " ".join(refer) refer = batch.original_abstracts[i].strip() list_of_reference.append([refer]) outputfile.write(batch.original_articles[i] + '\t' + output_now + '\n') bleu = matrix.bleu_score(list_of_reference, output_result) acc = matrix.compute_acc(list_of_reference, output_result) print("bleu : {} acc : {}".format(bleu, acc)) return
def validation_acc(dev_model,FLAGS): dev_model.create_or_load_recent_model() dev_loss = 0 valid_batcher = dev_model.batcher numBatches = 0 totalLoss = 0 output_result = [] list_of_reference = [] step = dev_model.get_specific_variable(dev_model.global_step) out_f = open(r"train_model/{}.test".format(step),"w",encoding='utf-8') #gate_f = open(os.path.join(FLAGS.log_root, "gate.txt"), 'w', encoding="utf-8") def write_pgens( pgen_label, gate_prob, gate_f): tmp_pgen_label = [] for i in range(len(pgen_label)): tmp_pgen_label.append(str(pgen_label[i])) #pgen_label[i] = str(pgen_label[i]) gate_f.write(' '.join(tmp_pgen_label) + '\t') gate_f.flush() tmp_gate = [] for i in range(len(gate_prob)): tmp_gate.append(str(gate_prob[i])) gate_prob[i] = str(gate_prob[i]) gate_f.write(' '.join(tmp_gate) + '\n') gate_f.flush() with dev_model.graph.as_default(): while True: valid_batch = valid_batcher.next_batch() if valid_batch is None: break if len(valid_batch.art_oovs) < len(valid_batch.enc_batch): continue results = dev_model.run_eval_step(valid_batch) loss = results['loss'] ids = np.array(results['final_ids']).T if np.isnan(loss): logging.debug("Nan") '''gate_p = np.array(results['gate_prob']) gate_p = np.argmax(gate_p,axis=-1) gate_p = gate_p.T''' for i,instance in enumerate(ids): if i>=valid_batch.real_length: print("eval done with {} isntances".format(len(output_result))) break if i==len(valid_batch.art_oovs): break out_words = data.outputids2words(instance, dev_model._vocab_out, valid_batch.art_oovs[i]) #refer = data.outputids2words(valid_batch.target_batch[i],dev_model._vocab,valid_batch.art_oovs[i]) if data.STOP_DECODING in out_words: out_words = out_words[:out_words.index(data.STOP_DECODING)] #if data.STOP_DECODING in refer: # refer = refer[:refer.index(data.STOP_DECODING)] output_now = " ".join(out_words) output_result.append(output_now) #refer = " ".join(refer) refer = valid_batch.original_abstracts[i].strip() list_of_reference.append([refer]) out_f.write(valid_batch.original_articles[i]+ '\t' + valid_batch.original_abstracts[i]+'\t'+output_now+'\n') #gate_f.write(str(valid_batch.enc_lens[i])+'\t') #write_pgens(valid_batch.pgen_label[i], gate_p[i],gate_f) totalLoss += loss numBatches += 1 bleu = matrix.bleu_score(list_of_reference,output_result) acc = matrix.compute_acc(list_of_reference,output_result) logging.info("dev_bleu {}".format(bleu)) logging.info("right acc {}".format(acc)) import random for i in range(2): idx_sample = random.randint(0,len(output_result)-1) logging.info("real {}".format(list_of_reference[idx_sample][0])) logging.info("fake {}\n\n".format(output_result[idx_sample])) # print("totalLoss{}".format(float(totalLoss) / float(numBatches))) return bleu,acc,dev_loss
def decode(self): """Decode examples until data is exhausted (if FLAGS.single_pass) and return, or decode indefinitely, loading latest checkpoint at regular intervals""" t0 = time.time() counter = 0 f = os.path.join(FLAGS.log_root, "output.txt") # print("----------------"+f) outputfile = codecs.open(f, "w", "utf8") output_result = [] list_of_reference = [] while True: batch = self._batcher.next_batch( ) # 1 example repeated across batch if batch is None: # finished decoding dataset in single_pass mode logging.info("eval_finished") outputfile.close() break print(self._batcher.c_index) original_article = batch.original_articles[0] # string original_abstract = batch.original_abstracts[0] # string original_abstract_sents = batch.original_abstracts_sents[ 0] # list of strings article_withunks = data.show_art_oovs(original_article, self._vocab) # string abstract_withunks = data.show_abs_oovs( original_abstract, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None)) # string # Run beam search to get best Hypothesis result, all_candidate = self.eval_one_batch_with_candidate( self._sess, self._model, self._vocab, batch) #result = self.eval_one_batch(self._sess, self._model, self._vocab, batch) for i, instance in enumerate(result): if i == len(batch.art_oovs): break if i >= batch.real_length: print("eval done with {} isntances".format( len(output_result))) break out_words = data.outputids2words(instance, self._model._vocab_out, batch.art_oovs[i]) if data.STOP_DECODING in out_words: out_words = out_words[:out_words.index(data.STOP_DECODING)] candidates_value = self.get_condidate_predicate( out_words, all_candidate[i], batch.art_oovs[i]) candidates_value = "_||_".join(candidates_value) output_now = " ".join(out_words) output_result.append(output_now) # refer = " ".join(refer) refer = batch.original_abstracts[i].strip() list_of_reference.append([refer]) outputfile.write(batch.original_articles[i] + '\t' + batch.original_abstracts[i] + '\t' + output_now + '\t' + candidates_value + '\n') bleu = matrix.bleu_score(list_of_reference, output_result) acc = matrix.compute_acc(list_of_reference, output_result) print("bleu : {} acc : {}".format(bleu, acc)) return