Пример #1
0
# open don quixote and clean it up to be used as the source
print "Opening sourcefile (donquixote.txt)"
f = open("donquixote.txt", "rb")
dq = " ".join(f.readlines())
f.close()

# clean up the source string
import re
print "Cleaning sourcefile (donquixote.txt)"
dq = re.sub("[^a-zA-Z0-9_ ]", "", dq.strip().lower()) # lowercase all the characters.

dq.split(" ") # dq as a string is a list of characters. make it a list of words.

# calculate the n_grams at a given level and store it to a dict
from ngram import stochastic_walk, calc_up_to_ngram
n_grams = calc_up_to_ngram(dq, 1) # 1 is the level, basically word counts

# save the ngram dictionary to a file using pickle
import pickle
with open("donquixote_ngram_dict.pk", "w") as of:
	pickle.dump(n_grams, of)

for i in n_grams:
	print i, n_grams[i]
# to load the ngram dictionary:
# with open("donquixote_ngram_dict.pk", "r") as if:
#	n_grams = pickle.load(if)

Пример #2
0
# open don quixote and clean it up to be used as the source
print "Opening sourcefile (donquixote.txt)"
f = open("donquixote.txt", "rb")
dq = " ".join(f.readlines())
f.close()

# clean up the source string
import re
print "Cleaning sourcefile (donquixote.txt)"
dq = re.sub("[^a-zA-Z0-9_ ]", "",
            dq.strip().lower())  # lowercase all the characters.

dq.split(
    " ")  # dq as a string is a list of characters. make it a list of words.

# calculate the n_grams at a given level and store it to a dict
from ngram import stochastic_walk, calc_up_to_ngram
n_grams = calc_up_to_ngram(dq, 1)  # 1 is the level, basically word counts

# save the ngram dictionary to a file using pickle
import pickle
with open("donquixote_ngram_dict.pk", "w") as of:
    pickle.dump(n_grams, of)

for i in n_grams:
    print i, n_grams[i]
# to load the ngram dictionary:
# with open("donquixote_ngram_dict.pk", "r") as if:
#	n_grams = pickle.load(if)
Пример #3
0
dq = " ".join(f.readlines())
f.close()
import re
print "Cleaning sourcefile (prideandprejudice.txt)"
dq = re.sub("[^a-zA-Z0-9_ ]", "",
            dq.strip().lower())  # lowercase all the characters.

sep = '\0'

# uncomment the next two lines to analyze words
sep = ' '
dq = dq.split(sep)

# do the text generation. this takes a while.
from ngram import stochastic_walk, calc_up_to_ngram
n_grams = calc_up_to_ngram(dq, 5)

print "Walking (1-deep) ... "
a = stochastic_walk(dq, 50, 1, n_grams, sep)
print "\t%s" % sep.join(a)
print ""

print "Walking (3-deep) ... "
b = stochastic_walk(dq, 50, 3, n_grams, sep)
print "\t%s" % sep.join(b)
print ""

print "Walking (5-deep) ... "
c = stochastic_walk(dq, 50, 5, n_grams, sep)
print "\t%s" % sep.join(c)
print ""
Пример #4
0
f = open("prideandprejudice.txt", "rb")
dq = " ".join(f.readlines())
f.close()
import re
print "Cleaning sourcefile (prideandprejudice.txt)"
dq = re.sub("[^a-zA-Z0-9_ ]", "", dq.strip().lower()) # lowercase all the characters.

sep = '\0'

# uncomment the next two lines to analyze words
sep = ' '
dq = dq.split(sep)

# do the text generation. this takes a while.
from ngram import stochastic_walk, calc_up_to_ngram
n_grams = calc_up_to_ngram(dq, 5)

print "Walking (1-deep) ... "
a = stochastic_walk(dq, 50, 1, n_grams, sep) 
print "\t%s" % sep.join(a)
print ""

print "Walking (3-deep) ... "
b = stochastic_walk(dq, 50, 3, n_grams, sep)
print "\t%s" % sep.join(b)
print ""

print "Walking (5-deep) ... "
c = stochastic_walk(dq, 50, 5, n_grams, sep)
print "\t%s" % sep.join(c)
print ""