示例#1
0
def test_ppl():
    global x_tr, x_dev
    eq_(len(x_tr), 7)
    eq_(len(x_dev), 1)
    vocab = preproc.create_vocab(x_tr)
    uniformLM = lm.UniformLM(vocab)
    eq_(15, lm.perplexity(uniformLM, x_dev))

    vocab_uni = preproc.create_vocab(x_tr)

    unigramLM = lm.UnigramLM(vocab_uni, x_tr)
    assert_almost_equals(8.25, lm.perplexity(unigramLM, x_dev), places=3)
示例#2
0
def test_bigram():
    global x_tr, x_dev
    eq_(len(x_tr), 7)
    eq_(len(x_dev), 1)
    vocab = preproc.create_vocab(x_tr)
    bigramLM = lm.NgramLM(vocab, x_tr, 2)
    eq_(1.0,bigramLM.probability("dog","the"))
    eq_(2, lm.perplexity(bigramLM, x_dev))
示例#3
0
def test_unigram():
    global x_tr
    eq_(len(x_tr), 7)
    vocab = preproc.create_vocab(x_tr)
    eq_(len(vocab), 15)
    unigramLM = lm.UnigramLM(vocab, x_tr)
    eq_(2/33, unigramLM.probability("another"))
    eq_(1/33, unigramLM.probability("?"))
    eq_(33, unigramLM._norm[()])
示例#4
0
def test_uniform():
    global x_tr
    eq_(len(x_tr), 7)
    vocab = preproc.create_vocab(x_tr)
    eq_(len(vocab), 15)
    uniformLM = lm.UniformLM(vocab)
    eq_(0.06666666666666667, uniformLM.probability("the"))
    eq_(0.06666666666666667, uniformLM.probability("?"))
    eq_(0.0, uniformLM.probability("notttt")) # make sure to return 0 if word not in vocab
    
    ## we can test the probability distribution
    assert_almost_equals(1.0, sum([uniformLM.probability(w) for w in uniformLM.vocab]))
示例#5
0
from snlp import preproc

reload(preproc);
x_train = preproc.read_data('data/corpus.csv',preprocessor=preproc.space_tokenizer)

! nosetests tests/test_preproc.py:test_space_tok


# ----------------------------------
# 1.2

reload(preproc);
! nosetests tests/test_preproc.py:test_create_vocab

print(preproc.create_vocab(x_train))

# ----------------------------------
# 2.1 

from snlp import lm
reload(lm);
x_train = preproc.read_data('data/corpus.csv',preprocessor=preproc.space_tokenizer)

# ----------------------------------
# 2.2

# instantiate a uniform LM 
vocab = preproc.create_vocab(x_train)
uniformLM = lm.UniformLM(vocab)
示例#6
0
def test_create_vocab():
    global x_tr
    vocab = preproc.create_vocab(x_tr)

    eq_(len(vocab), 15)