Пример #1
0
def testA():
    WORD_OCCURRENCES = {
        'airplane': 2487028,
        'alien': 5400198,
        'accept': 26299474
    }

    LETTER_FREQ_STRING = 'andetsrliocupgmybhvfwkxqjz'

    print('Testing with a.txt...\n')
    words = wordData.readWordFile('a.txt')

    # test totalOccurrences
    for word in WORD_OCCURRENCES:
        print('Total occurrences of', word + ':',
              'OK' if wordData.totalOccurrences(word, words) == WORD_OCCURRENCES[word] \
                else 'GOT: ' + str(wordData.totalOccurrences(word, words)) +
                     ', EXPECTED: ' + str(WORD_OCCURRENCES[word]))

    freqString = letterFreq.letterFreq(words)
    if freqString == LETTER_FREQ_STRING:
        print('\nFrequency ordering of letters OK')
    else:
        print('Frequency ordering of letters incorrect.')
        print('GOT: ' + freqString)
        print('EXPECTED: ' + LETTER_FREQ_STRING)
Пример #2
0
def letterFreq(words):
    """
    Compute the letter frequency values.
    :param words (dictionary): A dictionary mapping words to lists of YearCount
                               objects
    :return: A list containing the relative frequency of letters scaled by the
             total letter count in alphabetical order.
    :rtype: list
    
    """
    letterDict = {}
    freqList = []
    letterList = list(string.ascii_lowercase)
    totalOccur = 0
    for word in words:
        totalOccur += len(word) * wordData.totalOccurrences(word, words)
        for item in word:
            if item not in letterDict:
                letterDict[item] = wordData.totalOccurrences(word, words)
            else:
                letterDict[item] += wordData.totalOccurrences(word, words)
    while len(letterList) != 0:
        letter = letterList.pop(0)
        if letter not in letterDict:
            freqList.append(float(0))
        else:
            freq = letterDict[letter] / totalOccur
            freqList.append(freq)
    return freqList
Пример #3
0
def testVeryShort():
    WORD_OCCURRENCES = {
        'airport': 348996,
        'request': 2816909,
        'wandered': 451106
    }

    LETTER_FREQ = [
        0.03104758705050717, 0.0, 0.0, 0.03500991824543893, 0.2536276129665047,
        0.0, 0.0, 0.0, 0.013542627927787708, 0.0, 0.0, 0.0, 0.0,
        0.017504959122719464, 0.013542627927787708, 0.013542627927787708,
        0.10930884736053291, 0.15389906233882777, 0.10930884736053291,
        0.12285147528832062, 0.10930884736053291, 0.0, 0.017504959122719464,
        0.0, 0.0, 0.0
    ]

    print('Testing with very_short.csv...')
    words = wordData.readWordFile('very_short.csv')

    # test totalOccurrences
    for word in words:
        print('Total occurrences of', word + ':',
              'OK' if wordData.totalOccurrences(word, words) == WORD_OCCURRENCES[word] \
                else 'GOT: ' + str(wordData.totalOccurrences(word, words)) +
                     ', EXPECTED: ' + str(WORD_OCCURRENCES[word]))

    freqList = letterFreq.letterFreq(words)
    for ch, got, expected in zip(string.ascii_lowercase, freqList,
                                 LETTER_FREQ):
        print(
            'Frequency of', ch + ':', 'OK' if got == expected else 'GOT: ' +
            str(got) + ', EXPECTED: ' + str(expected))
Пример #4
0
def testVeryShort():
    WORD_OCCURRENCES = {'airport' : 348996,
                        'request' : 2816909,
                        'wandered' : 451106}

    LETTER_FREQ = [0.03104758705050717,
                   0.0,
                   0.0,
                   0.03500991824543893,
                   0.2536276129665047,
                   0.0,
                   0.0,
                   0.0,
                   0.013542627927787708,
                   0.0,
                   0.0,
                   0.0,
                   0.0,
                   0.017504959122719464,
                   0.013542627927787708,
                   0.013542627927787708,
                   0.10930884736053291,
                   0.15389906233882777,
                   0.10930884736053291,
                   0.12285147528832062,
                   0.10930884736053291,
                   0.0,
                   0.017504959122719464,
                   0.0,
                   0.0,
                   0.0]

    print('Testing with very_short.csv...')
    words = wordData.readWordFile('very_short.csv')

    # test totalOccurrences
    for word in words:
        print('Total occurrences of', word + ':',
              'OK' if wordData.totalOccurrences(word, words) == WORD_OCCURRENCES[word] \
                else 'GOT: ' + str(wordData.totalOccurrences(word, words)) +
                     ', EXPECTED: ' + str(WORD_OCCURRENCES[word]))

    freqList = letterFreq.letterFreq(words)
    for ch, got, expected in zip(string.ascii_lowercase, freqList, LETTER_FREQ):
        print('Frequency of', ch + ':',
              'OK' if got == expected else 'GOT: ' + str(got) +
                    ', EXPECTED: ' + str(expected))
Пример #5
0
def main():
    fileName=input("Enter word file: ")
    words=wordData.readWordFile(fileName)
    word=input("Enter word: ")
    occ=wordData.totalOccurrences(word,words)
    print("Total occurrences of ",word,":",occ)
    print("Letter frequencies: ",letterFreq(words))
    print("# plot is displayed")
    freqList=letterFreq(words)
    letterHist.letterFreqPlot(freqList)
    input("Enter to exit")
Пример #6
0
def main():
    """
    The main function.
    :return None
    :rtype: NoneType
    
    """
    fileName = input("Enter word file: ")
    word = input("Enter word: ")
    words = wordData.readWordFile(fileName)
    print("Total occurrences of", word, ":",
          wordData.totalOccurrences(word, words))
    freqList = letterFreq(words)
    print("Letter frequencies:", freqList)
    letterHist.letterFreqPlot(freqList)
    input("Hit Enter to EXIT")
Пример #7
0
def wordFrequencies(words):
    """
    Creates a list of WordCount objects.
    :param words (dictionary): A dictionary mapping words to lists of YearCount
                               objects
    :return A list of WordCount objects in decreasing order from most to least
            frequent.
    :rtype: list
 
    """
    freqList = []
    for word in words:
        count = wordData.totalOccurrences(word, words)
        freqList.append(wordData.createWordCount(word, count))
    sort(freqList)
    return freqList
Пример #8
0
def letterFreq(words):
    '''
    letterFreq(words)
    words (dic) -> dic mapping words to lists of YearCount objects
    Return a list containing the relative frequency of letters 
    scaled by the total letter count in alphabetical order
    '''
    freq=list('abcdefghijklmnopqrstuvwxyz')
    d={}
    for w in words.items():
        for k in w[0]:
            if k not in d:
                currcount=0
                for v in w[1]:
                    currcount+=v.count
                d[k]=currcount
            else:
                currcount=0
                for v in w[1]:
                    currcount+=v.count
                d[k]+=currcount
    total=0
    for w in words:
        a=len(w)*wordData.totalOccurrences(w,words)
        total+=a
    for  n in d:
        for lst in range(len(freq)):
            if n==freq[lst]:
                freq[lst]=(d[n]/total)
            else:
                pass
    for l in range(len(freq)):
        a=str(freq[l])
        if a.isalpha():
            freq[l]=0.0
        else:
            pass
    return freq