def test_ppl(): global x_tr, x_dev eq_(len(x_tr), 7) eq_(len(x_dev), 1) vocab = preproc.create_vocab(x_tr) uniformLM = lm.UniformLM(vocab) eq_(15, lm.perplexity(uniformLM, x_dev)) vocab_uni = preproc.create_vocab(x_tr) unigramLM = lm.UnigramLM(vocab_uni, x_tr) assert_almost_equals(8.25, lm.perplexity(unigramLM, x_dev), places=3)
def test_bigram(): global x_tr, x_dev eq_(len(x_tr), 7) eq_(len(x_dev), 1) vocab = preproc.create_vocab(x_tr) bigramLM = lm.NgramLM(vocab, x_tr, 2) eq_(1.0,bigramLM.probability("dog","the")) eq_(2, lm.perplexity(bigramLM, x_dev))
def test_unigram(): global x_tr eq_(len(x_tr), 7) vocab = preproc.create_vocab(x_tr) eq_(len(vocab), 15) unigramLM = lm.UnigramLM(vocab, x_tr) eq_(2/33, unigramLM.probability("another")) eq_(1/33, unigramLM.probability("?")) eq_(33, unigramLM._norm[()])
def test_uniform(): global x_tr eq_(len(x_tr), 7) vocab = preproc.create_vocab(x_tr) eq_(len(vocab), 15) uniformLM = lm.UniformLM(vocab) eq_(0.06666666666666667, uniformLM.probability("the")) eq_(0.06666666666666667, uniformLM.probability("?")) eq_(0.0, uniformLM.probability("notttt")) # make sure to return 0 if word not in vocab ## we can test the probability distribution assert_almost_equals(1.0, sum([uniformLM.probability(w) for w in uniformLM.vocab]))
from snlp import preproc reload(preproc); x_train = preproc.read_data('data/corpus.csv',preprocessor=preproc.space_tokenizer) ! nosetests tests/test_preproc.py:test_space_tok # ---------------------------------- # 1.2 reload(preproc); ! nosetests tests/test_preproc.py:test_create_vocab print(preproc.create_vocab(x_train)) # ---------------------------------- # 2.1 from snlp import lm reload(lm); x_train = preproc.read_data('data/corpus.csv',preprocessor=preproc.space_tokenizer) # ---------------------------------- # 2.2 # instantiate a uniform LM vocab = preproc.create_vocab(x_train) uniformLM = lm.UniformLM(vocab)
def test_create_vocab(): global x_tr vocab = preproc.create_vocab(x_tr) eq_(len(vocab), 15)