Python create_vocab示例

编程语言: Python

命名空间/包名称: snlp.preproc

方法/功能: create_vocab

hotexamples.com的示例: 6

Python create_vocab - 已找到6个示例。这些是从开源项目中提取的最受好评的snlp.preproc.create_vocab现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： test_lms.py 项目： gergokoncz/itu

def test_ppl():
    global x_tr, x_dev
    eq_(len(x_tr), 7)
    eq_(len(x_dev), 1)
    vocab = preproc.create_vocab(x_tr)
    uniformLM = lm.UniformLM(vocab)
    eq_(15, lm.perplexity(uniformLM, x_dev))

    vocab_uni = preproc.create_vocab(x_tr)

    unigramLM = lm.UnigramLM(vocab_uni, x_tr)
    assert_almost_equals(8.25, lm.perplexity(unigramLM, x_dev), places=3)

示例#2

显示文件

文件： test_lms.py 项目： gergokoncz/itu

def test_bigram():
    global x_tr, x_dev
    eq_(len(x_tr), 7)
    eq_(len(x_dev), 1)
    vocab = preproc.create_vocab(x_tr)
    bigramLM = lm.NgramLM(vocab, x_tr, 2)
    eq_(1.0,bigramLM.probability("dog","the"))
    eq_(2, lm.perplexity(bigramLM, x_dev))

示例#3

显示文件

文件： test_lms.py 项目： gergokoncz/itu

def test_unigram():
    global x_tr
    eq_(len(x_tr), 7)
    vocab = preproc.create_vocab(x_tr)
    eq_(len(vocab), 15)
    unigramLM = lm.UnigramLM(vocab, x_tr)
    eq_(2/33, unigramLM.probability("another"))
    eq_(1/33, unigramLM.probability("?"))
    eq_(33, unigramLM._norm[()])

示例#4

显示文件

文件： test_lms.py 项目： gergokoncz/itu

def test_uniform():
    global x_tr
    eq_(len(x_tr), 7)
    vocab = preproc.create_vocab(x_tr)
    eq_(len(vocab), 15)
    uniformLM = lm.UniformLM(vocab)
    eq_(0.06666666666666667, uniformLM.probability("the"))
    eq_(0.06666666666666667, uniformLM.probability("?"))
    eq_(0.0, uniformLM.probability("notttt")) # make sure to return 0 if word not in vocab
    
    ## we can test the probability distribution
    assert_almost_equals(1.0, sum([uniformLM.probability(w) for w in uniformLM.vocab]))

示例#5

显示文件

from snlp import preproc

reload(preproc);
x_train = preproc.read_data('data/corpus.csv',preprocessor=preproc.space_tokenizer)

! nosetests tests/test_preproc.py:test_space_tok


# ----------------------------------
# 1.2

reload(preproc);
! nosetests tests/test_preproc.py:test_create_vocab

print(preproc.create_vocab(x_train))

# ----------------------------------
# 2.1 

from snlp import lm
reload(lm);
x_train = preproc.read_data('data/corpus.csv',preprocessor=preproc.space_tokenizer)

# ----------------------------------
# 2.2

# instantiate a uniform LM 
vocab = preproc.create_vocab(x_train)
uniformLM = lm.UniformLM(vocab)

示例#6

显示文件

文件： test_preproc.py 项目： gergokoncz/itu

def test_create_vocab():
    global x_tr
    vocab = preproc.create_vocab(x_tr)

    eq_(len(vocab), 15)