示例#1
0
def TFIDFFreq(singleText, freqTextcollection, freqSingleText):

    # calculate TFIDF using values from the frequency collection - the text converted to 2 letters words of the rarest
    # frequency

    TFIDF = _TFCalculteHebrewFreq(singleText, freqSingleText)

    # use the idf value of the frequency word
    for each in TFIDF:
        TFIDF[each] *= _IDFCalculate(freqTextcollection, Parshiot.processWordByFrequency(each))
    return TFIDF
示例#2
0
def _TFCalculteHebrewFreq(full_text_array, freq_text_array):
    # hebrew text is already tokenized from Parshiot.py

    # count the frequencies in the 2 letter words
    TF = Counter(freq_text_array)

    # divide by total number of words
    textLength = len(freq_text_array)
    for each in TF:
        TF[each] /= textLength

    # create a counter for the full words. Set each TF to the TF of the 2 letter word calculated before
    TFFinal = Counter(full_text_array)

    for word in TFFinal:
        freqWord = Parshiot.processWordByFrequency(word)
        TFFinal[word] = TF[freqWord]

    # return the TF dictionary containing each word and its relative frequency
    return TFFinal
示例#3
0
def parshaFreqIDF(parshaName, parshiot, freqParshiot):
    freqTFIDF = TFIDF(freqParshiot[parshaName], freqParshiot, 'hebrew')
    regTFIDF = Counter(parshiot[parshaName])
    for word,value in regTFIDF.items():
        regTFIDF[word] = freqTFIDF[Parshiot.processWordByFrequency(word)]
    return regTFIDF