Exemplo n.º 1
0
    def pair_wise_decode(self):
        f = os.path.join(FLAGS.data_path, "output.txt")
        outputfile = codecs.open(f, "w", "utf8")
        output_result = []
        list_of_reference = []
        while True:
            batch = self._batcher.next_pairwised_decode_batch(
            )  # 1 example repeated across batch
            if batch is None:  # finished decoding dataset in single_pass mode
                logging.info("eval_finished")
                outputfile.close()
                break
            print(self._batcher.c_index)
            original_article = batch.original_articles[0]  # string
            original_abstract = batch.original_abstracts[0]  # string
            original_abstract_sents = batch.original_abstracts_sents[
                0]  # list of strings

            article_withunks = data.show_art_oovs(original_article,
                                                  self._vocab)  # string
            abstract_withunks = data.show_abs_oovs(
                original_abstract, self._vocab,
                (batch.art_oovs[0] if FLAGS.pointer_gen else None))  # string

            # Run beam search to get best Hypothesis
            result = self.eval_one_batch(self._sess, self._model, self._vocab,
                                         batch)
            # result = self.eval_one_batch(self._sess, self._model, self._vocab, batch)

            for i, instance in enumerate(result):
                if i == len(batch.art_oovs):
                    break
                if i >= batch.real_length:
                    print("eval done with {} isntances".format(
                        len(output_result)))
                    break
                out_words = data.outputids2words(instance,
                                                 self._model._vocab_out,
                                                 batch.art_oovs[i])
                if data.STOP_DECODING in out_words:
                    out_words = out_words[:out_words.index(data.STOP_DECODING)]

                output_now = " ".join(out_words)
                output_result.append(output_now)
                # refer = " ".join(refer)

                refer = batch.original_abstracts[i].strip()
                list_of_reference.append([refer])

                outputfile.write(batch.original_articles[i] + '\t' +
                                 output_now + '\n')

        bleu = matrix.bleu_score(list_of_reference, output_result)
        acc = matrix.compute_acc(list_of_reference, output_result)

        print("bleu : {}   acc : {}".format(bleu, acc))
        return
Exemplo n.º 2
0
def validation_acc(dev_model,FLAGS):
    dev_model.create_or_load_recent_model()
    dev_loss = 0
    valid_batcher = dev_model.batcher
    numBatches = 0
    totalLoss = 0

    output_result = []
    list_of_reference = []

    step = dev_model.get_specific_variable(dev_model.global_step)
    out_f = open(r"train_model/{}.test".format(step),"w",encoding='utf-8')

    #gate_f = open(os.path.join(FLAGS.log_root, "gate.txt"), 'w', encoding="utf-8")
    def write_pgens( pgen_label, gate_prob, gate_f):
        tmp_pgen_label = []
        for i in range(len(pgen_label)):
            tmp_pgen_label.append(str(pgen_label[i]))
            #pgen_label[i] = str(pgen_label[i])

        gate_f.write(' '.join(tmp_pgen_label) + '\t')
        gate_f.flush()
        tmp_gate = []
        for i in range(len(gate_prob)):
            tmp_gate.append(str(gate_prob[i]))
            gate_prob[i] = str(gate_prob[i])
        gate_f.write(' '.join(tmp_gate) + '\n')
        gate_f.flush()

    with dev_model.graph.as_default():
        while True:
            valid_batch = valid_batcher.next_batch()
            if valid_batch is None:
                break

            if len(valid_batch.art_oovs) < len(valid_batch.enc_batch):
                continue
            results = dev_model.run_eval_step(valid_batch)
            loss = results['loss']
            ids = np.array(results['final_ids']).T


            if np.isnan(loss):
                logging.debug("Nan")

            '''gate_p = np.array(results['gate_prob'])
            gate_p = np.argmax(gate_p,axis=-1)
            gate_p = gate_p.T'''
            for i,instance in enumerate(ids):
                if i>=valid_batch.real_length:
                    print("eval done with {} isntances".format(len(output_result)))
                    break
                if i==len(valid_batch.art_oovs):
                    break
                out_words = data.outputids2words(instance, dev_model._vocab_out, valid_batch.art_oovs[i])
                #refer = data.outputids2words(valid_batch.target_batch[i],dev_model._vocab,valid_batch.art_oovs[i])

                if data.STOP_DECODING in out_words:
                    out_words = out_words[:out_words.index(data.STOP_DECODING)]
                #if data.STOP_DECODING in refer:
                #    refer = refer[:refer.index(data.STOP_DECODING)]

                output_now = " ".join(out_words)
                output_result.append(output_now)
                #refer = " ".join(refer)

                refer = valid_batch.original_abstracts[i].strip()
                list_of_reference.append([refer])

                out_f.write(valid_batch.original_articles[i]+ '\t' + valid_batch.original_abstracts[i]+'\t'+output_now+'\n')

                #gate_f.write(str(valid_batch.enc_lens[i])+'\t')
                #write_pgens(valid_batch.pgen_label[i], gate_p[i],gate_f)

            totalLoss += loss
            numBatches += 1

    bleu = matrix.bleu_score(list_of_reference,output_result)
    acc = matrix.compute_acc(list_of_reference,output_result)

    logging.info("dev_bleu {}".format(bleu))

    logging.info("right acc {}".format(acc))

    import random
    for i in range(2):
        idx_sample = random.randint(0,len(output_result)-1)
        logging.info("real {}".format(list_of_reference[idx_sample][0]))
        logging.info("fake {}\n\n".format(output_result[idx_sample]))


    # print("totalLoss{}".format(float(totalLoss) / float(numBatches)))
    return bleu,acc,dev_loss
Exemplo n.º 3
0
    def decode(self):
        """Decode examples until data is exhausted (if FLAGS.single_pass) and return, or decode indefinitely, loading latest checkpoint at regular intervals"""
        t0 = time.time()
        counter = 0

        f = os.path.join(FLAGS.log_root, "output.txt")
        # print("----------------"+f)
        outputfile = codecs.open(f, "w", "utf8")
        output_result = []
        list_of_reference = []
        while True:
            batch = self._batcher.next_batch(
            )  # 1 example repeated across batch
            if batch is None:  # finished decoding dataset in single_pass mode
                logging.info("eval_finished")
                outputfile.close()
                break
            print(self._batcher.c_index)
            original_article = batch.original_articles[0]  # string
            original_abstract = batch.original_abstracts[0]  # string
            original_abstract_sents = batch.original_abstracts_sents[
                0]  # list of strings

            article_withunks = data.show_art_oovs(original_article,
                                                  self._vocab)  # string
            abstract_withunks = data.show_abs_oovs(
                original_abstract, self._vocab,
                (batch.art_oovs[0] if FLAGS.pointer_gen else None))  # string

            # Run beam search to get best Hypothesis
            result, all_candidate = self.eval_one_batch_with_candidate(
                self._sess, self._model, self._vocab, batch)
            #result = self.eval_one_batch(self._sess, self._model, self._vocab, batch)

            for i, instance in enumerate(result):
                if i == len(batch.art_oovs):
                    break
                if i >= batch.real_length:
                    print("eval done with {} isntances".format(
                        len(output_result)))
                    break
                out_words = data.outputids2words(instance,
                                                 self._model._vocab_out,
                                                 batch.art_oovs[i])
                if data.STOP_DECODING in out_words:
                    out_words = out_words[:out_words.index(data.STOP_DECODING)]

                candidates_value = self.get_condidate_predicate(
                    out_words, all_candidate[i], batch.art_oovs[i])
                candidates_value = "_||_".join(candidates_value)

                output_now = " ".join(out_words)
                output_result.append(output_now)
                # refer = " ".join(refer)

                refer = batch.original_abstracts[i].strip()
                list_of_reference.append([refer])

                outputfile.write(batch.original_articles[i] + '\t' +
                                 batch.original_abstracts[i] + '\t' +
                                 output_now + '\t' + candidates_value + '\n')

        bleu = matrix.bleu_score(list_of_reference, output_result)
        acc = matrix.compute_acc(list_of_reference, output_result)

        print("bleu : {}   acc : {}".format(bleu, acc))
        return