def testA(): WORD_OCCURRENCES = { 'airplane': 2487028, 'alien': 5400198, 'accept': 26299474 } LETTER_FREQ_STRING = 'andetsrliocupgmybhvfwkxqjz' print('Testing with a.txt...\n') words = wordData.readWordFile('a.txt') # test totalOccurrences for word in WORD_OCCURRENCES: print('Total occurrences of', word + ':', 'OK' if wordData.totalOccurrences(word, words) == WORD_OCCURRENCES[word] \ else 'GOT: ' + str(wordData.totalOccurrences(word, words)) + ', EXPECTED: ' + str(WORD_OCCURRENCES[word])) freqString = letterFreq.letterFreq(words) if freqString == LETTER_FREQ_STRING: print('\nFrequency ordering of letters OK') else: print('Frequency ordering of letters incorrect.') print('GOT: ' + freqString) print('EXPECTED: ' + LETTER_FREQ_STRING)
def letterFreq(words): """ Compute the letter frequency values. :param words (dictionary): A dictionary mapping words to lists of YearCount objects :return: A list containing the relative frequency of letters scaled by the total letter count in alphabetical order. :rtype: list """ letterDict = {} freqList = [] letterList = list(string.ascii_lowercase) totalOccur = 0 for word in words: totalOccur += len(word) * wordData.totalOccurrences(word, words) for item in word: if item not in letterDict: letterDict[item] = wordData.totalOccurrences(word, words) else: letterDict[item] += wordData.totalOccurrences(word, words) while len(letterList) != 0: letter = letterList.pop(0) if letter not in letterDict: freqList.append(float(0)) else: freq = letterDict[letter] / totalOccur freqList.append(freq) return freqList
def testVeryShort(): WORD_OCCURRENCES = { 'airport': 348996, 'request': 2816909, 'wandered': 451106 } LETTER_FREQ = [ 0.03104758705050717, 0.0, 0.0, 0.03500991824543893, 0.2536276129665047, 0.0, 0.0, 0.0, 0.013542627927787708, 0.0, 0.0, 0.0, 0.0, 0.017504959122719464, 0.013542627927787708, 0.013542627927787708, 0.10930884736053291, 0.15389906233882777, 0.10930884736053291, 0.12285147528832062, 0.10930884736053291, 0.0, 0.017504959122719464, 0.0, 0.0, 0.0 ] print('Testing with very_short.csv...') words = wordData.readWordFile('very_short.csv') # test totalOccurrences for word in words: print('Total occurrences of', word + ':', 'OK' if wordData.totalOccurrences(word, words) == WORD_OCCURRENCES[word] \ else 'GOT: ' + str(wordData.totalOccurrences(word, words)) + ', EXPECTED: ' + str(WORD_OCCURRENCES[word])) freqList = letterFreq.letterFreq(words) for ch, got, expected in zip(string.ascii_lowercase, freqList, LETTER_FREQ): print( 'Frequency of', ch + ':', 'OK' if got == expected else 'GOT: ' + str(got) + ', EXPECTED: ' + str(expected))
def testVeryShort(): WORD_OCCURRENCES = {'airport' : 348996, 'request' : 2816909, 'wandered' : 451106} LETTER_FREQ = [0.03104758705050717, 0.0, 0.0, 0.03500991824543893, 0.2536276129665047, 0.0, 0.0, 0.0, 0.013542627927787708, 0.0, 0.0, 0.0, 0.0, 0.017504959122719464, 0.013542627927787708, 0.013542627927787708, 0.10930884736053291, 0.15389906233882777, 0.10930884736053291, 0.12285147528832062, 0.10930884736053291, 0.0, 0.017504959122719464, 0.0, 0.0, 0.0] print('Testing with very_short.csv...') words = wordData.readWordFile('very_short.csv') # test totalOccurrences for word in words: print('Total occurrences of', word + ':', 'OK' if wordData.totalOccurrences(word, words) == WORD_OCCURRENCES[word] \ else 'GOT: ' + str(wordData.totalOccurrences(word, words)) + ', EXPECTED: ' + str(WORD_OCCURRENCES[word])) freqList = letterFreq.letterFreq(words) for ch, got, expected in zip(string.ascii_lowercase, freqList, LETTER_FREQ): print('Frequency of', ch + ':', 'OK' if got == expected else 'GOT: ' + str(got) + ', EXPECTED: ' + str(expected))
def main(): fileName=input("Enter word file: ") words=wordData.readWordFile(fileName) word=input("Enter word: ") occ=wordData.totalOccurrences(word,words) print("Total occurrences of ",word,":",occ) print("Letter frequencies: ",letterFreq(words)) print("# plot is displayed") freqList=letterFreq(words) letterHist.letterFreqPlot(freqList) input("Enter to exit")
def main(): """ The main function. :return None :rtype: NoneType """ fileName = input("Enter word file: ") word = input("Enter word: ") words = wordData.readWordFile(fileName) print("Total occurrences of", word, ":", wordData.totalOccurrences(word, words)) freqList = letterFreq(words) print("Letter frequencies:", freqList) letterHist.letterFreqPlot(freqList) input("Hit Enter to EXIT")
def wordFrequencies(words): """ Creates a list of WordCount objects. :param words (dictionary): A dictionary mapping words to lists of YearCount objects :return A list of WordCount objects in decreasing order from most to least frequent. :rtype: list """ freqList = [] for word in words: count = wordData.totalOccurrences(word, words) freqList.append(wordData.createWordCount(word, count)) sort(freqList) return freqList
def letterFreq(words): ''' letterFreq(words) words (dic) -> dic mapping words to lists of YearCount objects Return a list containing the relative frequency of letters scaled by the total letter count in alphabetical order ''' freq=list('abcdefghijklmnopqrstuvwxyz') d={} for w in words.items(): for k in w[0]: if k not in d: currcount=0 for v in w[1]: currcount+=v.count d[k]=currcount else: currcount=0 for v in w[1]: currcount+=v.count d[k]+=currcount total=0 for w in words: a=len(w)*wordData.totalOccurrences(w,words) total+=a for n in d: for lst in range(len(freq)): if n==freq[lst]: freq[lst]=(d[n]/total) else: pass for l in range(len(freq)): a=str(freq[l]) if a.isalpha(): freq[l]=0.0 else: pass return freq