Пример #1
0
from corpus import get_data, get_lexicon
from hmm import get_HMM
from viterbi import decode
from evaluation import eval_model
import random
import csv

data = random.shuffle(get_data("corpus.xml"))
train_set = get_data("corpus.xml")[:3000]
test_set = get_data("corpus.xml")[3000:]
lexicon = get_lexicon("lexicon.txt")
model = get_HMM(train_set, lexicon)

results = eval_model(train_set[:1500], model)

conditions = results["worst_tags"][:10]
samples = results["worst_codes"][:10]
print(results["confusion"].tabulate(conditions, samples))
"""
import pandas as pd
tmp = pd.DataFrame(results["confusion"]).fillna(0)
#print(results["confusion"].items())
print(tmp)
"""
"""
print("train set: 3000")
print("sent acc: ",results["sent_accuracy"])
print("     acc: ",results["accuracy"])

results = eval_model(test_set, model)
Пример #2
0
from corpus import get_data, get_lexicon
from hmm import get_HMM
from viterbi import decode
from evaluation import eval_model
import random

train_set = get_data("corpus.xml")[:]
lexicon = get_lexicon("lexicon.txt")
model = get_HMM(train_set, lexicon)

results = eval_model(train_set, model)

print("\nworst_tags\n", results["worst_tags"])
print("\nworst_words:\n", results["worst_words"])
print("\nacc: ", results["accuracy"])
#print("confusion matrix:")
#print(results["confusion"].tabulate())
Пример #3
0
from corpus import get_data, get_lexicon
from hmm import get_HMM
from viterbi import decode
from evaluation import eval_model
import random

train_set = get_data("corpus.xml")[:3000]
test_set = get_data("corpus.xml")[3000:]
lexicon = get_lexicon("lexicon.txt")
model = get_HMM(train_set, lexicon)

results = eval_model(train_set, model)

print("train set: corpus[:3000]")
print("\nworst_tags\n", results["worst_tags"])
print("\nworst_words:\n", results["worst_words"])
print("\nacc: ", results["accuracy"])
#print("confusion matrix:")
#print(results["confusion"].tabulate())

results = eval_model(test_set, model)
print("test set: corpus[3000:] ")
print("\nworst_tags\n", results["worst_tags"])
print("\nworst_words:\n", results["worst_words"])
print("\nacc: ", results["accuracy"])
#print("confusion matrix:")
#print(results["confusion"].tabulate())