예제 #1
0
# -*- coding:utf-8 -*-
from nltk.util import ngrams
from util.kneser_ney import KneserNeyLM
from util.reader import Reader
import time

train_seg = 'LM_data/train.seg'
lm_arpa = 'train.arpa'

reader = Reader()
sents = reader.read_seg(train_seg)
print('Read sentences done!')

ngrams3 = (ngram for sent in sents
           for ngram in ngrams(sent, 3, pad_left=False, pad_right=False))

lm = KneserNeyLM(3)
print('Start train...')
start = time.time()
lm.train(ngrams3)
end = time.time()
print('Train cost {:.2f}s'.format(end - start))
lm.save_lm(lm_arpa)
print('Save trained lm to {:s}'.format(lm_arpa))
예제 #2
0
# -*- coding:utf-8 -*-
from util.reader import Reader
from util.kneser_ney import KneserNeyLM
import time

reader = Reader()
sents = reader.read_seg('LM_data/test.seg')
print('Read sentences done!')

lm = KneserNeyLM(3)

start = time.time()
lm.read_lm('train.arpa')
end = time.time()
print('Load lm done! Cost {:.2f}s'.format(end - start))

print('Test lm begin...')
start = time.time()
lm.test_pp(sents)
end = time.time()
print('Test end. Cost {:.2f}s'.format(end - start))