Python WordNGram示例

编程语言: Python

命名空间/包名称: wordngram

类/类型: WordNGram

hotexamples.com的示例: 7

Python WordNGram - 已找到7个示例。这些是从开源项目中提取的最受好评的wordngram.WordNGram现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

WordNGram(7)

cols(1)

empty_count(1)

generate_sentence(1)

示例#1

显示文件

def test_one_gram_count():
    ngram = WordNGram(1, sample[0])
    expected_count = {
        'the': {
            'over': 1
        },
        'quick': {
            'the': 1
        },
        'brown': {
            'quick': 1
        },
        'fox': {
            'brown': 1
        },
        'jumps': {
            'fox': 1
        },
        'over': {
            'jumps': 1
        },
        'lazy': {
            'the': 1
        },
        'dog': {
            'lazy': 1
        },
    }
    assert ngram.counts == expected_count

    ngram = WordNGram(1, sample[1])
    expected_count = {
        'auni': {
            'auni': 1,
            'the': 1
        },
        'the': {
            'auni': 1,
            'the': 3
        },
    }
    assert ngram.counts == expected_count

示例#2

显示文件

def test_zero_gram_count():
    ngram = WordNGram(0, sample[0])
    expected_count = {
        'the': {
            '': 2
        },
        'quick': {
            '': 1
        },
        'brown': {
            '': 1
        },
        'fox': {
            '': 1
        },
        'jumps': {
            '': 1
        },
        'over': {
            '': 1
        },
        'lazy': {
            '': 1
        },
        'dog': {
            '': 1
        },
    }
    assert ngram.counts == expected_count

    ngram = WordNGram(0, sample[1])
    expected_count = {
        'auni': {
            '': 2
        },
        'the': {
            '': 5
        },
    }
    assert ngram.counts == expected_count

示例#3

显示文件

def test_prev_n():
    _prev_n = lambda text, c, n: WordNGram(n, text).prev_n(c)

    assert _prev_n('abc', 3, 1) == list('')
    assert _prev_n('abc', 3, 3) == 'abc'.split(' ')
    assert _prev_n(sample[0], 3, 1) == 'brown'.split(' ')
    assert _prev_n(sample[0], 3, 2) == 'quick brown'.split(' ')
    x = 0
    assert _prev_n(sample[0], 8, 8 - x) == sample[0].lower().split(' ')[x:-1]
    x = 1
    assert _prev_n(sample[0], 8, 8 - x) == sample[0].lower().split(' ')[x:-1]
    x = 5
    assert _prev_n(sample[0], 8, 8 - x) == sample[0].lower().split(' ')[x:-1]

示例#4

显示文件

def test_three_gram_count():
    ngram = WordNGram(3, sample[1])
    expected_count = {
        'auni': {
            'the^the^the': 1,
            'the^the^auni': 1
        },
        'the': {
            'the^the^the': 1,
            'the^auni^auni': 1
        },
    }
    assert ngram.counts == expected_count

示例#5

显示文件

def test_clean():
    _clean = lambda text: WordNGram(0, text).text

    assert _clean('~~') == []
    assert _clean('abc~~') == ['abc']
    assert _clean('ABC~~') == ['abc']
    assert _clean('the quick brown fox ') == ['the', 'quick', 'brown', 'fox']
    assert _clean('the quick brown fox ~~@!') == [
        'the', 'quick', 'brown', 'fox'
    ]
    assert _clean('the quick@#% brown fox ') == [
        'the', 'quick', 'brown', 'fox'
    ]
    assert _clean('th@#%!e quick brown fox ') == [
        'the', 'quick', 'brown', 'fox'
    ]

示例#6

显示文件

文件： process.py 项目： auni53/compulinguistics

def main():
    with open(FILENAME) as f:
        text = f.read()

    print
    ngram = WordNGram(0, text)
    print "sentences from monogram"
    print ngram.generate_sentence(6)
    print ngram.generate_sentence(7)
    print ngram.generate_sentence(8)
    print ngram.generate_sentence(9)
    print ngram.generate_sentence(10)
    print

    ngram = WordNGram(1, text)
    print "sentences from bigrams"
    print ngram.generate_sentence(6)
    print ngram.generate_sentence(7)
    print ngram.generate_sentence(8)
    print ngram.generate_sentence(9)
    print ngram.generate_sentence(10)
    print

    ngram = WordNGram(2, text)
    print "sentences from trigram"
    print ngram.generate_sentence(6)
    print ngram.generate_sentence(7)
    print ngram.generate_sentence(8)
    print ngram.generate_sentence(9)
    print ngram.generate_sentence(10)
    print

示例#7

显示文件

def test_empty_count():
    ngram = WordNGram(0, '')
    result = ngram.empty_count()

    assert set(result.keys()) == set([c for c in ngram.cols()])
    assert len(set(map(lambda v: id(v), result.values()))) == len(ngram.cols())