Exemplo n.º 1
0
def compute_novelty(sentences, corpus_file, opt, idx_to_word):
    """Computes the novelty of a batch of sentences given a corpus."""
    # Prepare sampled sentences and corpus to compare to
    ref = sentences[0].split("\n")
    sentences = [s.split(" ") for s in sentences[1].split("\n")]
    with open(corpus_file, 'r') as f:
        corpus = [s.rstrip().split(" ") for s in f.readlines()]

    # Remove sentences much longer than the sampled sentences length
    corpus = [s for s in corpus if len(s) < opt.sample_len + 5]

    # Compute the novelty for each sentence
    novelty = []
    closest = []
    for i, sen in enumerate(sentences):
        print("Computing novelty for sentence {}/{}.\n".format(
            i, len(sentences)))
        mindex = np.argmin(np.array([ter(sen, s) for s in corpus]))
        novelty.append(ter(sen, corpus[mindex]))
        closest.append(" ".join(
            [idx_to_word[int(idx)] for idx in corpus[mindex]]))
        print("Novelty: {}, Sentence: {}, Closest: {}\n".format(
            novelty[i], ref[i], closest[i]))
    return sum(novelty) / float(len(novelty)), sorted(
        zip(novelty, ref, closest))
Exemplo n.º 2
0
def spl(mt_path, ht_path):
    # Scores per line (bleu and ter)
    logger.info([mt_path, ht_path])
    sacreBLEU = subprocess.Popen(
        "cat {} | sacrebleu -sl -b {} > {}.bpl".format(mt_path, ht_path,
                                                       mt_path),
        cwd=app.config['TMP_FOLDER'],
        shell=True,
        stdout=subprocess.PIPE)
    sacreBLEU.wait()

    rows = []
    with open('{}.bpl'.format(mt_path), 'r') as bl_file:
        rows = [{"bleu": line.strip()} for line in bl_file]

    os.remove("{}.bpl".format(mt_path))

    with open(ht_path) as ht_file, open(mt_path) as mt_file:
        for i, row in enumerate(rows):
            ht_line = ht_file.readline().strip()
            mt_line = mt_file.readline().strip()
            if ht_line and mt_line:
                ter = round(pyter.ter(ht_line.split(), mt_line.split()), 2)
                rows[i]['ter'] = 100 if ter > 1 else utils.parse_number(
                    ter * 100, 2)
                rows[i]['text'] = mt_line

    return rows
Exemplo n.º 3
0
def ter(rw, hw):

    #ter
    terScore = '%.3f' % pyter.ter(hw, rw)
    return terScore

    print("SENTENCE ", sentCounter + 1, " calculus completed")
Exemplo n.º 4
0
 def score_instance(self,
                    hypothesis: List[str],
                    reference: List[str]) -> float:
     if reference and hypothesis:
         return pyter.ter(hypothesis, reference)
     if not reference and not hypothesis:
         return 0.0
     return 1.0
def ter(ref, gen):
    '''
    Args:
        ref - reference sentences - in a list
        gen - generated sentences - in a list
    Returns:
        averaged TER score over all sentence pairs
    '''
    if len(ref) == 1:
        total_score = pyter.ter(gen[0].split(), ref[0].split())
    else:
        total_score = 0
        for i in range(len(gen)):
            total_score = total_score + pyter.ter(gen[i].split(),
                                                  ref[i].split())
        total_score = total_score / len(gen)
    return total_score
Exemplo n.º 6
0
    def get_value(self, mt_path, ht_path):
        ter = 0.0
        with open(mt_path, 'r') as mt_file, open(ht_path, 'r') as ht_file:
            for i, (mt_line, ht_line) in enumerate(zip(mt_file, ht_file)):
                ter += pyter.ter(ht_line.split(), mt_line.split())

        ter = round((ter / (i + 1)) * 100, 2)

        return 100.0, float(ter), 0.0
Exemplo n.º 7
0
 def __call__(self, decoded, references):
     ter_sum = 0
     for hyp, ref in zip(decoded, references):
         if ref and hyp:
             ter_sum += pyter.ter(hyp, ref)
         elif not ref and not hyp:
             ter_sum += 0.
         else:
             ter_sum += 1.
     return ter_sum / len(decoded)
Exemplo n.º 8
0
 def __call__(self, decoded, references) -> float:
     ter_sum = 0.
     count = 0
     for hyp, ref in zip(decoded, references):
         count += 1
         if ref and hyp:
             ter_sum += pyter.ter(hyp, ref)
         elif not ref and not hyp:
             ter_sum += 0.
         else:
             ter_sum += 1.
     return ter_sum / count
Exemplo n.º 9
0
def get_ter_score(hypothesis: List[List[str]], reference: List[str]) -> list:
    ter_score_list = []
    for (hyps, ref) in zip(hypothesis, reference):
        try:
            ter_score = 0
            for hyp_n in hyps:
                ter_score += pyter.ter(hyp_n, ref)
            ter_score = ter_score / len(hyps)
            ter_score_list.append(ter_score)
        except:
            continue

    return ter_score_list
Exemplo n.º 10
0
def compute_ter(pred, data, pad_idx):
    """Computes the translation error rate of predicted sentences.

    Args:
        pred(list): [num_sentences, max_len]. Predictions in index form.
        data(list): [num_sentences, max_len]. Gold standard indices.

    Return:
        float: corpus TER between 0 and 1.
    """
    pred = [remove_padding(p, pad_idx) for p in pred]
    data = [remove_padding(d, pad_idx) for d in data]
    return sum([ter(p, d) for p, d in zip(pred, data)]) / float(len(pred))
Exemplo n.º 11
0
    def cal_seq(data: pd.DataFrame, selected_list: list, remaining_list: list,
                n: int, memo_ter: list, alpha: float, beta: float):

        #assert n > 0, "Number of selected predictions has to be a positive integer."

        # Select the top score in data["Scores"] as the first selected index
        if n == 1:
            selected_idx = np.argmax(data["Scores"].to_numpy())
            selected_list.append(selected_idx)
            remaining_list[selected_idx] = False
            return selected_list, remaining_list, memo_ter

        if n > 1:
            selected_list, remaining_list, memo_ter = cal_seq(
                data, selected_list, remaining_list, n - 1, memo_ter, alpha,
                beta)

            #print(n - 2)
            ter_list = [[] for _ in range(len(data.index))
                        ]  # ter_list stores TER scores for n
            ref = data["Predictions"][selected_list[
                n - 2]]  # Take the latest selected index
            for iter_i in range(len(data.index)):
                if remaining_list[iter_i] == 0:
                    # Setting False for already selected indexes in remaining_list to exclude them from calculating TER scores
                    ter_list[iter_i] = 0.0
                else:
                    ter_list[iter_i] = pyter.ter(data["Predictions"][iter_i],
                                                 ref)
                    # ter_list[iter_i] = fake_ter(data["Predictions"][iter_i], ref)

            memo_ter[n - 2] = ter_list  # Save TER socres to memo_ter
            #print("second")

            sum_ter = np.zeros((len(data.index), 1))
            z_scores = np.ones(sum_ter.shape) * (
                -np.inf)  # Initialize z_scores with negative infinite values
            for j in range(n - 1):
                ter = np.array(memo_ter[j], dtype=np.float64).reshape(50, 1)
                sum_ter = sum_ter + ter
            sum_ter = sum_ter / (
                n - 1)  # Calculate diversity scores by averaging TER scores
            z_scores[remaining_list] = alpha * np.array(data["Scores"][
                remaining_list]).reshape(-1, 1) + beta * sum_ter[
                    remaining_list]  # Update z_scores for remaining hypotheses
            selected_idx = np.argmax(z_scores)
            selected_list.append(selected_idx)
            remaining_list[selected_idx] = False

            return selected_list, remaining_list, memo_ter
Exemplo n.º 12
0
def escolha_ref_ter(references, candidate):
	"""
	No TER usa-se a referência mais próxima do output do tradutor.
	Esta função calcula para todas as referências o valor TER e escolhe o menor.
	:param references: Lista com as traducoes no corpus de teste.
	:param candidate: Lista com as traducoes do sistema de traducao.
	:return: TER minimo, a traducao de referencia usada para calcular o TER minimo
	"""
	score = []
	for r in references:
		score.append(pyter.ter(candidate, r))

	index_score = score.index(min(score))
	reference_escolhida = references[index_score]

	return min(score), reference_escolhida
Exemplo n.º 13
0
def ter(ref_path, hyp_path):
  """ Compute Translation Edit Rate between two files """
  with open(ref_path) as ref_fp, open(hyp_path) as hyp_fp:
    ref_line = ref_fp.readline()
    hyp_line = hyp_fp.readline()
    ter_score = 0.0
    line_cpt = 0.0
    while ref_line and hyp_line:
      ter_score = ter_score+(pyter.ter(hyp_line.strip().split(), \
                  ref_line.strip().split()))
      line_cpt = line_cpt+1
      ref_line = ref_fp.readline()
      hyp_line = hyp_fp.readline()
  mean_ter = 1.0
  if line_cpt > 0:
    mean_ter = ter_score/line_cpt
  return mean_ter
Exemplo n.º 14
0
    def ter_score(self, ref, hyp):
        """
            pyter: https://pypi.python.org/pypi/pyter/0.2.2.1

            Java: tercom.jar: http://www.cs.umd.edu/~snover/tercom/
                  Tercom github: https://github.com/jhclark/tercom

                  os.system('java -jar {dir}dependencies/tercom.7.25.jar -r {ref_file} -h {hyp_file} -n {dir}{output_file}'.
                  format(hyp_file=hyp, ref_file=ref, dir=utils.project_dir_name(), output_file="assets/test_ter.txt"))


        :param ref: reference text (separated into words)
        :param hyp: hypotheses text (separated into words)
        :return: TER score
        """

        return pyter.ter(hyp, ref)
Exemplo n.º 15
0
def ter_score(references, hypothesis, num_refs):
    logging.info('STARTING TO COMPUTE TER...')
    print('STARTING TO COMPUTE TER...')
    ter_scores = []
    for hyp, refs in zip(hypothesis, references):
        candidates = []
        for ref in refs[:num_refs]:
            if len(ref) == 0:
                ter_score = 1
            else:
                try:
                    ter_score = pyter.ter(hyp.split(), ref.split())
                except:
                    ter_score = 1
            candidates.append(ter_score)

        ter_scores.append(min(candidates))

    logging.info('FINISHING TO COMPUTE TER...')
    print('FINISHING TO COMPUTE TER...')
    return sum(ter_scores) / len(ter_scores)
Exemplo n.º 16
0
Arquivo: test.py Projeto: aflc/pyter
def test_paper():
    ref = 'SAUDI ARABIA denied THIS WEEK information published in the AMERICAN new york times'.split()
    hyp = 'THIS WEEK THE SAUDIS denied information published in the new york times'.split()
    assert 0.3076923076923077 == pyter.ter(hyp, ref)
Exemplo n.º 17
0
    def _add_cache(self, iwords, mat):
        node = self._cache
        skipnum = len(iwords) - len(mat)
        for i in range(skipnum):
            node = node[iwords[i]][0]
        assert len(iwords[skipnum:]) == len(mat)
        for word, row in zip(iwords[skipnum:], mat):
            if word not in node:
                node[word] = [{}, None]
            value = node[word]
            if value[1] is None:
                value[1] = tuple(row)
            node = value[0]

    def _find_cache(self, iwords):
        node = self._cache
        start_position, row = 0, None
        for idx, word in enumerate(iwords):
            if word in node:
                start_position = idx + 1
                node, row = node[word]
            else:
                break

        return start_position, row

ref = ' hello how are you '.split()
hyp = 'bonjour toi hellojdioro how '.split()
print('%.3f' % pyter.ter(hyp, ref))
Exemplo n.º 18
0
 def ter_score(self, src_x, src_y):
     return pyter.ter(src_x.split(), src_y.split())
Exemplo n.º 19
0
def metrics(fname):
    # BLEU
    from nltk.translate.bleu_score import sentence_bleu, corpus_bleu
    scores = []
    f = open("poc_english.txt", "r")
    f2 = open(fname, "r")
    lines = f.readlines()
    cand = f2.readlines()
    for i in range(len(cand)):
        line = lines[i]
        candidate = []
        l = cand[i].lower().strip('\n')[1:len(cand[i]) - 2].split(", ")
        for item in l:
            item = item.strip('.').split(" ")
            candidate.append(item)
        arr = line.strip('.\n').split(" ")
        for i in range(len(arr)):
            arr[i] = arr[i].lower()
        reference = [arr]
        for c in candidate:
            # print(reference, c, ': ', sentence_bleu(reference, c, weights=(1,0)))
            scores.append(sentence_bleu(reference, c, weights=(1, 0)))

    print("BLEU: " + str(sum(scores) / (1.0 * len(scores))))

    # Word2Vec Cosine Similarity
    import torch
    import torch.nn.functional as F
    from sentence_transformers import SentenceTransformer
    import nltk
    from nltk import tokenize

    def similarity(par1, par2):
        transformer = SentenceTransformer('roberta-base-nli-stsb-mean-tokens')
        transformer.eval()
        par1 = tokenize.sent_tokenize(par1)
        vec1 = torch.Tensor(transformer.encode(par1))
        vec1 = vec1.mean(0)
        par2 = tokenize.sent_tokenize(par2)
        vec2 = torch.Tensor(transformer.encode(par2))
        vec2 = vec2.mean(0)
        cos_sim = F.cosine_similarity(vec1, vec2, dim=0)
        return cos_sim.item()

    scores = []
    f = open("poc_english.txt", "r")
    f2 = open(fname, "r")
    lines = f.readlines()
    cand = f2.readlines()
    for i in range(len(cand)):
        line = lines[i]
        candidate = []
        l = cand[i].lower().strip('\n')[1:len(cand[i]) - 2].split(", ")
        for item in l:
            item = item.strip('.').split(" ")
            candidate.append(item)
        arr = line.strip('.\n').split(" ")
        if (len(arr) == 1):
            continue
        for i in range(len(arr)):
            arr[i] = arr[i].lower()
        reference = arr
        for c in candidate:
            scores.append(similarity(" ".join(reference), " ".join(c)))
    print("Word2Vec Cosine Similarity: " +
          str(sum(scores) / (1.0 * len(scores))))

    # WER
    scores = []
    f = open("poc_english.txt", "r")
    f2 = open(fname, "r")
    lines = f.readlines()
    cand = f2.readlines()
    for i in range(len(cand)):
        line = lines[i]
        candidate = []
        l = cand[i].lower().strip('\n')[1:len(cand[i]) - 2].split(", ")
        for item in l:
            item = item.strip('.').split(" ")
            candidate.append(item)
        arr = line.strip('.\n').split(" ")
        if (len(arr) == 1):
            continue
        for i in range(len(arr)):
            arr[i] = arr[i].lower()
        reference = arr
        for c in candidate:
            scores.append(wer_score(c, reference))
    print("WER: " + str(sum(scores) / (1.0 * len(scores))))

    # TER
    import pyter

    scores = []
    f = open("poc_english.txt", "r")
    f2 = open(fname, "r")
    lines = f.readlines()
    cand = f2.readlines()
    for i in range(len(cand)):
        line = lines[i]
        candidate = []
        l = cand[i].lower().strip('\n')[1:len(cand[i]) - 2].split(", ")
        for item in l:
            item = item.strip('.').split(" ")
            candidate.append(item)
        arr = line.strip('.\n').split(" ")
        if (len(arr) == 1):
            continue
        for i in range(len(arr)):
            arr[i] = arr[i].lower()
        reference = arr
        for c in candidate:
            scores.append(pyter.ter(reference, c))
    print("TER: " + str(sum(scores) / (1.0 * len(scores))))
Exemplo n.º 20
0
def ter_sim(text, hypo):
	return ter(text, hypo)
Exemplo n.º 21
0
def test_same():
    s = '''Since the visigoth period, the term Hispania, up until then used geographically, began to be also used with a political connotation, as an example the use of the expression Laus Hispaniae  to describe the history of the towns of the peninsula in the chronicles of Isodoro de Sevilla.'''
    assert pyter.ter(s.split(), s.split()) == 0
Exemplo n.º 22
0
def test_paper():
    ref = 'SAUDI ARABIA denied THIS WEEK information published in the AMERICAN new york times'.split(
    )
    hyp = 'THIS WEEK THE SAUDIS denied information published in the new york times'.split(
    )
    assert 0.3076923076923077 == pyter.ter(hyp, ref)
Exemplo n.º 23
0
def test(corpus,
         test_pairs,
         max_length,
         enable_cuda,
         epoch,
         transformer=False):

    scores_bleu = []
    scores_ter = []
    chencherry = SmoothingFunction()
    greedy_ref = open("greedy.ref", 'w', encoding='utf8')
    greedy_hyp = open("greedy.hyp", 'w', encoding='utf8')
    for i, (english,
            french) in tqdm(enumerate(list(zip(test_pairs[0],
                                               test_pairs[1])))):
        positions = corpus.word_positions(english)
        indices = corpus.to_indices(english)
        translation, attention = greedy(encoder, decoder, indices, positions,
                                        corpus.dict_f.word2index,
                                        corpus.dict_f.index2word, max_length,
                                        enable_cuda)

        if i == 35 and transformer:
            data = [
                go.Heatmap(z=attention,
                           x=english,
                           y=translation,
                           colorscale='Viridis')
            ]
            layout = go.Layout(width=800, height=600)
            fig = go.Figure(data=data, layout=layout)
            py.image.save_as(fig, filename='weights_{}.png'.format(epoch))
            attention1 = encoder.layer1.attention.last_weights1
            attention2 = encoder.layer1.attention.last_weights2
            attention3 = encoder.layer1.attention.last_weights3
            with open("weights_{}.txt".format(epoch), 'w') as f:
                f.write("\n".join([
                    "\t".join([str(num) for num in line])
                    for line in attention1
                ]))
                f.write("\n")
                f.write("\n".join([
                    "\t".join([str(num) for num in line])
                    for line in attention2
                ]))
                f.write("\n")
                f.write("\n".join([
                    "\t".join([str(num) for num in line])
                    for line in attention3
                ]))
                f.write("\n")
                f.write("\t".join(english))
                f.write("\t".join(translation))
        elif i == 35:
            data = [
                go.Heatmap(z=attention,
                           x=english,
                           y=translation,
                           colorscale='Viridis')
            ]
            layout = go.Layout(width=800, height=600)
            fig = go.Figure(data=data, layout=layout)
            py.image.save_as(fig, filename='weights_{}.png'.format(epoch))
            with open("weights_{}.txt".format(epoch), 'w') as f:
                f.write("\n".join([
                    "\t".join([str(num) for num in line]) for line in attention
                ]))
                f.write("\n")
                f.write("\t".join(english))
                f.write("\t".join(translation))

        french = clean(corpus.bpe_to_sentence(french))
        translation = clean(corpus.bpe_to_sentence(translation))
        scores_bleu.append(
            sentence_bleu([french],
                          translation,
                          smoothing_function=chencherry.method1))
        scores_ter.append(pyter.ter(translation, french))
        greedy_ref.write(" ".join(french) + "\n")
        greedy_hyp.write(" ".join(translation) + "\n")
    greedy_ref.close()
    greedy_hyp.close()
    score_bleu = sum(scores_bleu) / len(scores_bleu)
    score_ter = sum(scores_ter) / len(scores_ter)
    logging.info("Greedy, BLEU: {}, TER: {}, METEOR".format(
        score_bleu, score_ter))
    scores_bleu = []
    scores_ter = []

    beam_ref = open("beam.ref", 'w', encoding='utf8')
    beam_hyp = open("beam.hyp", 'w', encoding='utf8')
    lengths = []
    for english, french in tqdm(list(zip(test_pairs[0], test_pairs[1]))):
        positions = corpus.word_positions(english)
        indices = corpus.to_indices(english)
        translation, attention = beam(encoder, decoder, indices, positions,
                                      corpus.dict_f.word2index,
                                      corpus.dict_f.index2word, max_length,
                                      enable_cuda)

        if i == 35:
            # Attention visualization
            data = [
                go.Heatmap(z=attention,
                           x=english,
                           y=translation,
                           colorscale='Viridis')
            ]
            layout = go.Layout(width=800, height=600)
            fig = go.Figure(data=data, layout=layout)
            py.image.save_as(fig, filename='weights_{}.png'.format(epoch))
            with open("weights_{}.txt".format(epoch), 'w') as f:
                f.write("\n".join([
                    "\t".join([str(num) for num in line]) for line in attention
                ]))
                f.write("\n")
                f.write("\t".join(english))
                f.write("\t".join(translation))

        french = clean(corpus.bpe_to_sentence(french))
        translation = clean(corpus.bpe_to_sentence(translation))
        scores_bleu.append(
            sentence_bleu([french],
                          translation,
                          smoothing_function=chencherry.method1))
        scores_ter.append(pyter.ter(translation, french))
        beam_ref.write(" ".join(french) + "\n")
        beam_hyp.write(" ".join(translation) + "\n")
        lengths.append(len(french))
    beam_ref.close()
    beam_hyp.close()

    score_bleu = sum(scores_bleu) / len(scores_bleu)
    score_ter = sum(scores_ter) / len(scores_ter)
    logging.info("Beam, BLEU: {}, TER: {}, METEOR".format(
        score_bleu, score_ter))
    with open("lengths.txt", 'w') as f:
        f.write("\n".join([str(l) for l in lengths]))
    with open("bleu.txt", 'w') as f:
        f.write("\n".join([str(l) for l in scores_bleu]))
    with open("ter.txt", 'w') as f:
        f.write("\n".join([str(l) for l in scores_ter]))
Exemplo n.º 24
0
Arquivo: test.py Projeto: aflc/pyter
def test_same():
    s = '''Since the visigoth period, the term Hispania, up until then used geographically, began to be also used with a political connotation, as an example the use of the expression Laus Hispaniae  to describe the history of the towns of the peninsula in the chronicles of Isodoro de Sevilla.'''
    assert pyter.ter(s.split(), s.split()) == 0
Exemplo n.º 25
0
def ter(rw, hw):

    #ter
    terScore = 100 * float("{:.2f}".format(pyter.ter(hw, rw)))
    return terScore
Exemplo n.º 26
0
def get_ter_score(candidate, reference):
    return pyter.ter(candidate.split(), reference.split())
Exemplo n.º 27
0
n_best = 50
num_pred = 1000  # default = 100

predictions = readcsv_to_df(file_pth, num_pred)
hyps_data = clear_pad(predictions)  # <class 'pandas.core.frame.DataFrame'>

#TODO :
# Step 1: For one sequence,  take the top prediction as the reference, the rest predictions as hypothese to be compared.
# Calculating TER scores

ter_scores = [[] for _ in range(len(hyps_data.index))]
list_of_hyps = hyps_data["Predictions"].to_numpy(dtype=str)
ref = list_of_hyps[0]

for i, hyp in enumerate(list_of_hyps):
    ter_scores[i] = pyter.ter(hyp, ref)

hyps_data["TER scores"] = ter_scores

# TODO:
#  Step 2: Consider both scores for quality and TER scores as selection criteria
#  Scores: the higher, the better quality better
#  TER scores: the higher, the larger difference
#  Function (simple version): z_scores = beta * Scores + alpha * (TER scores),
#  beta can possibly be 0 when quality scores do not play a role in selections

# alpha = 1, beta = 1
hyps_data["Z_scores"] = hyps_data["Scores"] + hyps_data["TER scores"]

# sort data for every n_best number of sequences
sorted_data = pd.DataFrame(columns=hyps_data.columns)
Exemplo n.º 28
0
        arr[i] = arr[i].lower()
    reference = arr
    for c in candidate:
        scores.append(wer_score(c, reference))
print("WER: " + str(sum(scores) / (1.0 * len(scores))))

# TER
import pyter

scores = []
f = open("poc_english.txt", "r")
f2 = open(fname, "r")
lines = f.readlines()
cand = f2.readlines()
for i in range(len(cand)):
    line = lines[i]
    candidate = []
    l = cand[i].lower().strip('\n')[1:len(cand[i]) - 2].split(", ")
    for item in l:
        item = item.strip('.').split(" ")
        candidate.append(item)
    arr = line.strip('.\n').split(" ")
    if (len(arr) == 1):
        continue
    for i in range(len(arr)):
        arr[i] = arr[i].lower()
    reference = arr
    for c in candidate:
        scores.append(pyter.ter(reference, c))
print("TER: " + str(sum(scores) / (1.0 * len(scores))))
Exemplo n.º 29
0
	def compute_ter_score(hyp, ref):
		return pyter.ter(hyp, ref)
Exemplo n.º 30
0
    def train(self, savepoint=None):
        print 'Start training...'

        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            if savepoint != None:
                tf.train.Saver().restore(sess, savepoint)
            last_val_ter = 100.0
            patience_counter = 0
            for epoch in range(1000):
                print '[Epoch #' + str(epoch) + ']'

                train_pair_list = self._prepareTrainPairList()

                percent = 0
                for i in range(0,
                               len(train_pair_list) - self.batch_size,
                               self.batch_size):
                    feed_dict = self._prepareTrainFeedDictList(
                        train_pair_list, i)
                    #_, caption, wp_loss, seq_loss, prior_factor = sess.run([self.train_step, self.caption, self.word_predict_loss, self.seq_loss, self.prior_factor], feed_dict=feed_dict)
                    _, caption, seq_loss = sess.run(
                        [self.train_step, self.caption, self.seq_loss],
                        feed_dict=feed_dict)
                    caption_str = self.data.tokenListToCaption([
                        self.data.word_list[word]
                        for word in caption[0].tolist()
                    ])
                    #print 'Caption: "{}", WP loss: {}, Seq loss: {}'.format(caption_str, wp_loss, seq_loss)
                    print 'Caption: "{}", Seq loss: {}'.format(
                        caption_str, seq_loss)
                    if i * 100 / len(train_pair_list) > percent:
                        percent += 1
                        print '{}%'.format(percent)

                if epoch > 1:
                    '''
					mean_bleu_list = []
					max_bleu_list = []
					'''
                    ter_score_list = []
                    for i in range(len(self.data.val_feat_list)):
                        feed_dict = self._prepareTestFeedDictList(
                            self.data.val_feat_list, i)
                        caption = sess.run(self.caption, feed_dict=feed_dict)
                        caption_str = self.data.tokenListToCaption([
                            self.data.word_list[word]
                            for word in caption[0].tolist()
                        ])
                        bleu_list = []
                        '''
						for ref_caption in self.data.val_caption_str_list[i]:
							if caption_str != '':
								#bleu = bleu_eval.BLEU_fn(caption_str, ref_caption)
								bleu = pyter.ter(caption_str, ref_caption)
							else:
								bleu = 0.0
							bleu_list.append(bleu)
						mean_bleu = np.mean(bleu_list)
						max_bleu = max(bleu_list)
						print 'Caption: "{}", Correct: {}, Average BLEU: {}, Best BLEU: {}'.format(caption_str, random.choice(self.data.val_caption_str_list[i]), mean_bleu, max_bleu)
						mean_bleu_list.append(mean_bleu)
						max_bleu_list.append(max_bleu)
						'''
                        ter_score = pyter.ter(
                            caption_str,
                            random.choice(self.data.val_caption_str_list[i]))
                        ter_score_list.append(ter_score)
                        print 'Caption: "{}", Correct: {}, TER: {}'.format(
                            caption_str,
                            random.choice(self.data.val_caption_str_list[i]),
                            ter_score)
                    '''
					val_bleu = np.mean(max_bleu_list)
					print 'Validation BLEU: {}'.format(val_bleu)
					'''
                    val_ter = np.mean(ter_score_list)
                    print 'Validation TER: {}'.format(val_ter)

                    if val_ter > last_val_ter:
                        patience_counter += 1
                        print 'Patience Counter: {}'.format(patience_counter)
                        if patience_counter > self.patience:
                            break
                    else:
                        patience_counter = 0
                        last_val_ter = val_ter
                    tf.train.Saver().save(sess,
                                          self.save_path,
                                          global_step=epoch)
Exemplo n.º 31
0
def ter(hyp, ref):
    return pyter.ter(hyp, ref)
Exemplo n.º 32
0
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#  MA 02110-1301, USA.
#
import sys
import pyter
from nltk.tokenize import word_tokenize
import argparse

reload(sys)
sys.setdefaultencoding("utf-8")

parser = argparse.ArgumentParser()

parser.add_argument("hyptextfile", help="Hypothesis sentences")
parser.add_argument("reftextfile", help="Reference sentences")
parser.add_argument("resultfile", help="Result file")
args = parser.parse_args()

reftext = open(args.reftextfile).readlines()
hyptext = open(args.hyptextfile).readlines()

result = open(args.resultfile, "w")

for pair in zip(reftext, hyptext):
    tokenizedhyp = word_tokenize(pair[0])
    tokenizedref = word_tokenize(pair[1])
    result.write("{0}\n".format(
        pyter.ter(tokenizedhyp, tokenizedref) * len(tokenizedref)))