Пример #1
0
def test_hmm_count_correct():
    sentence = "Tamir the AP comes this story :"
    tags = "PROPN DET PROPN VERB DET NOUN PUNCT"
    gold_sentence = [(sentence.split()[i], tags.split()[i])
                     for i in range(len(sentence.split()))]

    sentences = tagger.load_annotated_corpus(
        'trainTestData/en-ud-train.upos.tsv')
    allTagCounts, perWordTagCounts, transitionCounts, emissionCounts, A, B = tagger.learn_params(
        sentences)
    pred_sentence = tagger.hmm_tag_sentence(sentence.split(), A, B)
    correct, correctOOV, OOV = tagger.count_correct(gold_sentence,
                                                    pred_sentence)
    print(correct, correctOOV, OOV)
Пример #2
0
def test_tag_sentence():
    sentence = "Tamir the AP comes this story :"
    sentences = tagger.load_annotated_corpus(
        'trainTestData/en-ud-train.upos.tsv')
    allTagCounts, perWordTagCounts, transitionCounts, emissionCounts, A, B = tagger.learn_params(
        sentences)

    #  Models that must be supported (you can add more):
    baseline = {'baseline': [perWordTagCounts, allTagCounts]}
    HMM = {'hmm': [A, B]}
    Vanilla_BiLSTM = {'blstm': [{}]}
    BiLSTM_case = {'cblstm': [{}]}

    print(tagger.tag_sentence(sentence.split(), baseline))
    print(tagger.tag_sentence(sentence.split(), HMM))
    print(tagger.tag_sentence(sentence.split(), Vanilla_BiLSTM))
    print(tagger.tag_sentence(sentence.split(), BiLSTM_case))
Пример #3
0
import tagger

train_path = r"C:\src\MastersCourses\NLP\Assign_4\data\en-ud-train.upos.tsv"
dev_path = r"C:\src\MastersCourses\NLP\Assign_4\data\en-ud-dev.upos.tsv"

train_data = tagger.load_annotated_corpus(train_path)
dev_data = tagger.load_annotated_corpus(dev_path)

[allTagCounts, perWordTagCounts, transitionCounts, emissionCounts, A,
 B] = tagger.learn_params(train_data)

gold_sentence = dev_data[0]
pred_sentence = [w[0] for w in gold_sentence]
print(f"tested sentence is {gold_sentence} of length {len(pred_sentence)}")

tagged_sentence = tagger.baseline_tag_sentence(pred_sentence, perWordTagCounts,
                                               allTagCounts)
correct, correctOOV, OOV = tagger.count_correct(gold_sentence, tagged_sentence)

print(f"correct: {correct}, correctOOV: {correctOOV}, OOV: {OOV}")

score_nom, score_denom = 0, 0
for gold_sentence in dev_data:
    pred_sentence = [w[0] for w in gold_sentence]
    tagged_sentence = tagger.baseline_tag_sentence(pred_sentence,
                                                   perWordTagCounts,
                                                   allTagCounts)
    correct, correctOOV, OOV = tagger.count_correct(gold_sentence,
                                                    tagged_sentence)
    score_nom += correct
    score_denom += len(pred_sentence)
Пример #4
0
def test_learn_params(sentences):
    allTagCounts, perWordTagCounts, transitionCounts, emissionCounts, A, B = tagger.learn_params(
        sentences)
    return allTagCounts, perWordTagCounts, transitionCounts, emissionCounts, A, B
Пример #5
0
def check_sampled_sentence(gold_sentence, model_dict):
    pred_sentence = [w[0] for w in gold_sentence]
    tagged_sentence = tag_sentence(pred_sentence, model_dict)
    correct, correctOOV, OOV = count_correct(gold_sentence, tagged_sentence)
    print(f"correct: {correct}, correctOOV: {correctOOV}, OOV: {OOV}\n")


train_path = r"en-ud-train.upos.tsv"
dev_path = r"en-ud-dev.upos.tsv"

train_data = load_annotated_corpus(train_path)
dev_data = load_annotated_corpus(dev_path)

[allTagCounts, perWordTagCounts, transitionCounts, emissionCounts, A,
 B] = learn_params(train_data)

# draw random sentence
gold_sentence = dev_data[randrange(len(dev_data))]
print(
    f"tested random sentence is {gold_sentence} of length {len(gold_sentence)}\n"
)

# test baseline
calc_score(dev_data, {'baseline': [perWordTagCounts, allTagCounts]})
check_sampled_sentence(gold_sentence,
                       {'baseline': [perWordTagCounts, allTagCounts]})

# test hmm
calc_score(dev_data, {'hmm': [A, B]})
check_sampled_sentence(gold_sentence, {'hmm': [A, B]})