Python calc_up_to_ngram示例

编程语言: Python

命名空间/包名称: ngram

方法/功能: calc_up_to_ngram

hotexamples.com的示例: 4

Python calc_up_to_ngram - 已找到4个示例。这些是从开源项目中提取的最受好评的ngram.calc_up_to_ngram现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： isaac_export.py 项目： panditanvita/ngram

# open don quixote and clean it up to be used as the source
print "Opening sourcefile (donquixote.txt)"
f = open("donquixote.txt", "rb")
dq = " ".join(f.readlines())
f.close()

# clean up the source string
import re
print "Cleaning sourcefile (donquixote.txt)"
dq = re.sub("[^a-zA-Z0-9_ ]", "", dq.strip().lower()) # lowercase all the characters.

dq.split(" ") # dq as a string is a list of characters. make it a list of words.

# calculate the n_grams at a given level and store it to a dict
from ngram import stochastic_walk, calc_up_to_ngram
n_grams = calc_up_to_ngram(dq, 1) # 1 is the level, basically word counts

# save the ngram dictionary to a file using pickle
import pickle
with open("donquixote_ngram_dict.pk", "w") as of:
	pickle.dump(n_grams, of)

for i in n_grams:
	print i, n_grams[i]
# to load the ngram dictionary:
# with open("donquixote_ngram_dict.pk", "r") as if:
#	n_grams = pickle.load(if)

示例#2

显示文件

# open don quixote and clean it up to be used as the source
print "Opening sourcefile (donquixote.txt)"
f = open("donquixote.txt", "rb")
dq = " ".join(f.readlines())
f.close()

# clean up the source string
import re
print "Cleaning sourcefile (donquixote.txt)"
dq = re.sub("[^a-zA-Z0-9_ ]", "",
            dq.strip().lower())  # lowercase all the characters.

dq.split(
    " ")  # dq as a string is a list of characters. make it a list of words.

# calculate the n_grams at a given level and store it to a dict
from ngram import stochastic_walk, calc_up_to_ngram
n_grams = calc_up_to_ngram(dq, 1)  # 1 is the level, basically word counts

# save the ngram dictionary to a file using pickle
import pickle
with open("donquixote_ngram_dict.pk", "w") as of:
    pickle.dump(n_grams, of)

for i in n_grams:
    print i, n_grams[i]
# to load the ngram dictionary:
# with open("donquixote_ngram_dict.pk", "r") as if:
#	n_grams = pickle.load(if)

示例#3

显示文件

文件： do_test.py 项目： peterldowns/ngram

dq = " ".join(f.readlines())
f.close()
import re
print "Cleaning sourcefile (prideandprejudice.txt)"
dq = re.sub("[^a-zA-Z0-9_ ]", "",
            dq.strip().lower())  # lowercase all the characters.

sep = '\0'

# uncomment the next two lines to analyze words
sep = ' '
dq = dq.split(sep)

# do the text generation. this takes a while.
from ngram import stochastic_walk, calc_up_to_ngram
n_grams = calc_up_to_ngram(dq, 5)

print "Walking (1-deep) ... "
a = stochastic_walk(dq, 50, 1, n_grams, sep)
print "\t%s" % sep.join(a)
print ""

print "Walking (3-deep) ... "
b = stochastic_walk(dq, 50, 3, n_grams, sep)
print "\t%s" % sep.join(b)
print ""

print "Walking (5-deep) ... "
c = stochastic_walk(dq, 50, 5, n_grams, sep)
print "\t%s" % sep.join(c)
print ""

示例#4

显示文件

文件： do_test.py 项目： panditanvita/ngram

f = open("prideandprejudice.txt", "rb")
dq = " ".join(f.readlines())
f.close()
import re
print "Cleaning sourcefile (prideandprejudice.txt)"
dq = re.sub("[^a-zA-Z0-9_ ]", "", dq.strip().lower()) # lowercase all the characters.

sep = '\0'

# uncomment the next two lines to analyze words
sep = ' '
dq = dq.split(sep)

# do the text generation. this takes a while.
from ngram import stochastic_walk, calc_up_to_ngram
n_grams = calc_up_to_ngram(dq, 5)

print "Walking (1-deep) ... "
a = stochastic_walk(dq, 50, 1, n_grams, sep) 
print "\t%s" % sep.join(a)
print ""

print "Walking (3-deep) ... "
b = stochastic_walk(dq, 50, 3, n_grams, sep)
print "\t%s" % sep.join(b)
print ""

print "Walking (5-deep) ... "
c = stochastic_walk(dq, 50, 5, n_grams, sep)
print "\t%s" % sep.join(c)
print ""