# Christian Clark, [email protected], 29 September 2014 import pickle, textstats as ts outFile = open('bigram_bible_austen_out.txt', 'w') # Part 1: The King James Bible # (A) and (B) Create token and type lists from the text file bInfile = open('../Ling 1330/gutenberg/gutenberg/bible-kjv.txt') bTxt = bInfile.read() bInfile.close() bToks = ts.getTokens(bTxt) bTypes = ts.getTypes(bTxt) # (C) Write out token and type counts to outFile outFile.write('There are a total of '+str(len(bToks))+' word tokens and '+\ str(len(bTypes))+' word types in the King James Bible.'+'\n\n') # (D) Create bigram frequency dictionary bBigrFreq = {} for bigr in ts.getWordNGrams(bToks, 2): if bigr in bBigrFreq: bBigrFreq[bigr] += 1 else: bBigrFreq[bigr] = 1
# Christian ... import pickle, textstats as ts outFile = open('2009-Obama_out.txt', 'w') # Part 1: The King James Bible # (A) and (B) Create token and type lists from the text file bInfile = open('2009-Obama.txt') bTxt = bInfile.read() bInfile.close() bToks = ts.getTokens(bTxt) bTypes = ts.getTypes(bTxt) # (C) Write out token and type counts to outFile outFile.write('There are a total of '+str(len(bToks))+' word tokens and '+\ str(len(bTypes))+' word types in Obama\'s speech.'+'\n\n') # (D) Create bigram frequency dictionary bBigrFreq = {} for bigr in ts.getWordNGrams(bToks, 2): if bigr in bBigrFreq: bBigrFreq[bigr] += 1 else: bBigrFreq[bigr] = 1