Python nGram示例

编程语言: Python

命名空间/包名称: ngram

方法/功能: nGram

hotexamples.com的示例: 6

Python nGram - 已找到6个示例。这些是从开源项目中提取的最受好评的ngram.nGram现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： spellcorrect.py 项目： DarkJ24/spellCheckPLN

 def __init__(self):
     """Constructor method to load external nGram class, load words, confusion matrix and dictionary."""
     self.ng = nGram(True, True, False, False, False)
     self.words = sorted(set(self.ng.words))[3246:]
     self.loadConfusionMatrix()
     self.dict = self.loadDict()
     return

示例#2

显示文件

文件： spellcorrect.py 项目： JasonZhao001/spellcorrect

 def __init__(self):
     """Constructor method to load external nGram class, load words, confusion matrix and dictionary."""
     self.ng = nGram(True, True, False, False, False)
     self.words = sorted(set(self.ng.words))[3246:]
     self.loadConfusionMatrix()
     self.dict = self.loadDict()
     return

示例#3

显示文件

文件： model.py 项目： Bonsior-yyc/WrongCharacters

 def __init__(self):
     self.grams = pd.read_csv('3gram.csv')
     self.py = pinyin()
     self.ng = nGram()
     self.segment = [
         '，', '。', '？', '！', '：', '；', '……', '【', '】', '（', '）', '“', '”',
         "《", '》', '、'
     ]
     print('init over')

示例#4

显示文件

            for i, key in enumerate(self.dic.keys()):
                f_csv.writerow({
                    'one': key[0],
                    'two': key[1],
                    'three': key[2],
                    'num': self.dic[key]
                })
                bar.bar(i, length, "Preprocessed ")
        print("\nfinish write: " + self.path)


if __name__ == '__main__':
    from ngram import nGram
    from visualization import Progress_bar

    ng = nGram()
    ngg = NGramGenerator(3)

    file_dir = 'raw_data'

    for files in os.listdir(file_dir)[-10:]:
        b = Progress_bar()
        with open(os.path.join(file_dir, files), 'r', encoding='utf-8') as f:
            data = f.read().split()
            l = len(data)
            for index, d in enumerate(data):
                gs = ng.ngram(d)
                ngg.generate(gs)
                b.bar(index, l, "Preprocessed " + files)
        print("\nfinish index: " + files)
    ngg.save()

示例#5

显示文件

文件： createNGram.py 项目： jezzi23/n-gram-spellchecker

import ngram
import sys
import pickle


if (len(sys.argv) < 3):
	print("Error: Expected arguments: ModuleFilename,  N")
	sys.exit()

moduleFilename = sys.argv[1]
n = int(sys.argv[2])

model = ngram.nGram(n)

open(moduleFilename, 'w')

model.printInfo()

ngram.saveObject(model, moduleFilename)

示例#6

显示文件

文件： tests.py 项目： thientu/ngram

import unittest
from ngram import nGram
ng = nGram(n=5, corpus_file=None, cache=False)


class TestNgram(unittest.TestCase):
    def test_uni_log(self):
        probability = ng.sentence_probability(sentence='hold your horses',
                                              n=1,
                                              form='log')
        self.assertAlmostEqual(probability, -24.9337710989)

    def test_uni_antilog(self):
        probability = ng.sentence_probability(sentence='hold your horses',
                                              n=1,
                                              form='antilog')
        self.assertAlmostEqual(probability, 1.48388689281e-11)

    def test_bi_log(self):
        probability = ng.sentence_probability(sentence='hold your horses',
                                              n=2,
                                              form='log')
        self.assertAlmostEqual(probability, -18.655540764)

    def test_bi_antilog(self):
        probability = ng.sentence_probability(sentence='hold your horses',
                                              n=2,
                                              form='antilog')
        self.assertAlmostEqual(probability, 7.90681521418e-09)

    def test_tri_log(self):