Пример #1
0
def evaluate_pretain(searcher, voc, test_x, test_y):
    ### Format input sentence as a batch
    # words -> indexes
    x_indexes_batch = [indexesFromSentence(voc, test_x)]
    y_indexes_batch = indexesFromFPs(voc, test_y)
    # y_indexes_batch = indexesFromSentence(voc, test_y)
    # y_indexes_batch = indexesFromSentence(voc, test_y)
    # Create lengths tensor
    lengths = torch.Tensor([len(indexes) for indexes in x_indexes_batch])
    # Transpose dimensions of batch to match models' expectations
    input_batch = torch.LongTensor(x_indexes_batch).transpose(0, 1)
    # Use appropriate device
    input_batch = input_batch.to(device)
    lengths = lengths.to(device)
    # Decode sentence with searcher
    tokens, scores = searcher(input_batch, lengths, 100)
    tokens = tokens[:-1]
    # indexes -> words
    decoded_words = [voc.index2word[token.item()] for token in tokens]
    # print(test_x)
    # print(x_indexes_batch)
    # print([voc.index2word[token] for token in x_indexes_batch[0]])
    # print(test_y)
    # print(decoded_words)
    reference = [decoded_words]
    candidate = [voc.index2word[token] for token in y_indexes_batch][:-1]
    print(test_x)
    print(''.join(decoded_words))
    print(''.join(candidate))
    # print(test_y)
    score = sentence_bleu(reference, candidate)
    dis = 1 - distance.nlevenshtein(decoded_words, candidate)
    print(dis)
    print('-' * 80)
    return score, 1 if dis >= 0.6 else 0
Пример #2
0
def evaluate_beam(encoder, decoder, voc, test1_x, test2_x, test3_x, test_y):
    ### Format input sentence as a batch
    # words -> indexes
    x1_indexes_batch = [indexesFromSentence(voc, test1_x)]
    x2_indexes_batch = [indexesFromSentence(voc, test2_x)]
    x3_indexes_batch = [indexesFromSentence(voc, test3_x)]
    y_indexes_batch = indexesFromFPs(voc, test_y)
    # y_indexes_batch = indexesFromSentence(voc, test_y)
    # Create lengths tensor
    lengths1 = torch.Tensor([len(indexes) for indexes in x1_indexes_batch])
    lengths2 = torch.Tensor([len(indexes) for indexes in x2_indexes_batch])
    lengths3 = torch.Tensor([len(indexes) for indexes in x3_indexes_batch])
    # Transpose dimensions of batch to match models' expectations
    input_batch1 = torch.LongTensor(x1_indexes_batch).transpose(0, 1)
    input_batch2 = torch.LongTensor(x2_indexes_batch).transpose(0, 1)
    input_batch3 = torch.LongTensor(x3_indexes_batch).transpose(0, 1)
    # Use appropriate device
    input_batch1 = input_batch1.to(device)
    input_batch2 = input_batch2.to(device)
    input_batch3 = input_batch3.to(device)
    lengths1 = lengths1.to(device)
    lengths2 = lengths2.to(device)
    lengths3 = lengths3.to(device)
    # Decode sentence with searcher
    tokens = beam_decode(encoder, decoder, input_batch1, lengths1,
                         input_batch2, lengths2, input_batch3, lengths3)
    # tokens = tokens[1:-1]
    # indexes -> words
    decoded_words = [voc.index2word[token.item()] for token in tokens]
    # print(test_x)
    # print(x_indexes_batch)
    # print([voc.index2word[token] for token in x_indexes_batch[0]])
    # print(test_y)
    # print(decoded_words)
    reference = [decoded_words]
    candidate = [voc.index2word[token] for token in y_indexes_batch][:-1]
    print(''.join(decoded_words))
    print(''.join(candidate))
    score = sentence_bleu(reference, candidate)
    return score
Пример #3
0
def evaluate_1tm(searcher, voc, test_x, test_y, k):
    ### Format input sentence as a batch
    # words -> indexes
    x_indexes_batch = [indexesFromSentence(voc, test_x)]
    y_indexes_batch = [indexesFromSentence(voc, y) for y in test_y]
    # y_indexes_batch1 = indexesFromSentence(voc, test_y[0])
    # y_indexes_batch2 = indexesFromSentence(voc, test_y[1])
    # y_indexes_batch3 = indexesFromSentence(voc, test_y[2])

    # Create lengths tensor
    lengths = torch.Tensor([len(indexes) for indexes in x_indexes_batch])
    # Transpose dimensions of batch to match models' expectations
    input_batch = torch.LongTensor(x_indexes_batch).transpose(0, 1)
    # Use appropriate device
    input_batch = input_batch.to(device)
    lengths = lengths.to(device)
    # Decode sentence with searcher
    tokens, scores = searcher(input_batch, lengths, 100)
    tokens_list = [token[:-1] for token in tokens]
    # tokens1 = tokens[0][:-1]
    # tokens2 = tokens[1][:-1]
    # tokens3 = tokens[2][:-1]

    # indexes -> words
    decoded_words = [[voc.index2word[token.item()] for token in tokens]
                     for tokens in tokens_list]
    # decoded_words1 = [voc.index2word[token.item()] for token in tokens1]
    # decoded_words2 = [voc.index2word[token.item()] for token in tokens2]
    # decoded_words3 = [voc.index2word[token.item()] for token in tokens3]

    # reference = [decoded_word for decoded_word in decoded_words]
    # reference1 = [decoded_words1]
    # reference2 = [decoded_words2]
    # reference3 = [decoded_words3]
    # candidate = [[voc.index2word[token] for token in y_index_batch][:-1] for y_index_batch in y_indexes_batch]
    # candidate1 = [voc.index2word[token] for token in y_indexes_batch1][:-1]
    # candidate2 = [voc.index2word[token] for token in y_indexes_batch2][:-1]
    # candidate3 = [voc.index2word[token] for token in y_indexes_batch3][:-1]

    predicted_fps = []
    actual_fps = []
    sim_threshold = 0.5

    for i in range(k):
        if test_y[i] != '':
            actual_fps.append(test_y[i])
        if ''.join(decoded_words[i]) != '':
            predicted_fps.append(''.join(decoded_words[i]))

    # print(test_x)
    # print(actual_fps)
    # print([''.join(decoded_word) for decoded_word in decoded_words])
    #print(predicted_fps)

    p, r, _ = cal_metrics_t(predicted_fps, actual_fps, sim_threshold)
    # bleu, _ , _ = cal_metrics(predicted_fps, actual_fps, sim_threshold)

    # score1 = 0.0 if (len(decoded_words1) == 0 or len(candidate1) == 0) else sentence_bleu(reference1, candidate1)
    # dis1 = 1 - distance.nlevenshtein(decoded_words1, candidate1)
    # score2 = 0.0 if (len(decoded_words2) == 0 or len(candidate2) == 0) else sentence_bleu(reference2, candidate2)
    # dis2 = 1 - distance.nlevenshtein(decoded_words2, candidate2)
    # score3 = 0.0 if (len(decoded_words3) == 0 or len(candidate3) == 0) else sentence_bleu(reference3, candidate3)
    # dis3 = 1 - distance.nlevenshtein(decoded_words3, candidate3)
    # dis = (dis1 + dis2 + dis3)/3
    # score = (score1 + score2 + score3)/3
    # print(dis)
    # print(p, r)
    # print('-' * 80)
    # return score, 1 if dis>=0.6 else 0
    return p, r