Python Parshiot примеры использования

Язык программирования: Python

Класс/Тип: Parshiot

Примеров на hotexamples.com: 9

Python Parshiot - 9 примеров найдено. Это лучшие примеры Python кода для Parshiot, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

createSplitParshiot(3)

processWordByFrequency(3)

parshaNames(2)

processParshiotByFrequency(2)

Пример #1

Показать файл

def filterBazaakParshaReadTFIDF(parshaName,
                                lang='heb',
                                min_count=MIN_WORD_COUNT,
                                splitParshiot=None,
                                min_distance=MIN_DISTANCE):
    if not splitParshiot:
        splitParshiot = Parshiot.createSplitParshiot(lang)

    topTFIDF = TFIDF.parshaIDF(parshaName, splitParshiot)
    totalWords = len(topTFIDF)

    # find the percentage needed
    percent = PERCENT / 100

    topTFIDF = topTFIDF.most_common(int(totalWords * percent))

    # just get the keys, the words
    topTFIDF = [i[0] for i in topTFIDF]

    parsha = splitParshiot[parshaName]
    read = BazaakRead(parsha, min_count, min_distance)

    # create a new dictionary, only containing results where the key was in the top PERCENT% of TF-IDF scores
    newRead = {k: v for k, v in read.items() if k in topTFIDF}

    return newRead

Пример #2

Показать файл

def freqBazaakParshaRead(parshaName,
                         freqParshiot=None,
                         min_count=MIN_WORD_COUNT,
                         min_distance=MIN_DISTANCE):
    if not freqParshiot:
        freqParshiot = Parshiot.processParshiotByFrequency()
    parsha = freqParshiot[parshaName]
    return BazaakRead(parsha, min_count, min_distance)

Пример #3

Показать файл

def BazaakParshaRead(parshaName,
                     lang='heb',
                     min_count=MIN_WORD_COUNT,
                     splitParshiot=None,
                     min_distance=MIN_DISTANCE):
    if not splitParshiot:
        splitParshiot = Parshiot.createSplitParshiot(lang)
    parsha = splitParshiot[parshaName]
    return BazaakRead(parsha, min_count, min_distance)

Пример #4

Показать файл

def BazaakAll(lang='heb',
              min_count=5,
              min_distance=80,
              filtered=False,
              strippedDown=True):
    if strippedDown:
        parshiot = Parshiot.processParshiotByFrequency()
    else:
        parshiot = Parshiot.createSplitParshiot(lang)
    parshaResults = {}
    if filtered:  # filter with TF-IDF results
        for parsha in parshaNames:
            parshaResults[parsha] = BazaakRead.filterBazaakParshaReadTFIDF(
                parsha, lang, min_count, parshiot, min_distance)
    else:
        for parsha in parshaNames:
            parshaResults[parsha] = BazaakRead.BazaakParshaRead(
                parsha, lang, min_count, parshiot, min_distance)
    return parshaResults

Пример #5

Показать файл

Файл: TFIDF.py Проект: rmoskow1/BazakAttack

def TFIDFFreq(singleText, freqTextcollection, freqSingleText):

    # calculate TFIDF using values from the frequency collection - the text converted to 2 letters words of the rarest
    # frequency

    TFIDF = _TFCalculteHebrewFreq(singleText, freqSingleText)

    # use the idf value of the frequency word
    for each in TFIDF:
        TFIDF[each] *= _IDFCalculate(freqTextcollection, Parshiot.processWordByFrequency(each))
    return TFIDF

Пример #6

Показать файл

def BazaakOutput(lang='heb', min_count=5, parshaResults=None, fileName=None):
    if not fileName:
        fileName = subDir + lang + 'Bazaak' + 'Output' + '.csv'
    parshaNames = Parshiot.parshaNames()
    if not parshaResults:
        parshaResults = BazaakAll(lang, min_count)

    with open(fileName, mode='w', encoding='utf-8') as csv_file:
        fieldnames = ['parsha', 'repeated words']
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

        writer.writeheader()
        for parsha in parshaNames:
            writer.writerow({
                'parsha':
                parsha,
                'repeated words':
                list(parshaResults[parsha].keys())
            })

Пример #7

Показать файл

Файл: TFIDF.py Проект: rmoskow1/BazakAttack

def _TFCalculteHebrewFreq(full_text_array, freq_text_array):
    # hebrew text is already tokenized from Parshiot.py

    # count the frequencies in the 2 letter words
    TF = Counter(freq_text_array)

    # divide by total number of words
    textLength = len(freq_text_array)
    for each in TF:
        TF[each] /= textLength

    # create a counter for the full words. Set each TF to the TF of the 2 letter word calculated before
    TFFinal = Counter(full_text_array)

    for word in TFFinal:
        freqWord = Parshiot.processWordByFrequency(word)
        TFFinal[word] = TF[freqWord]

    # return the TF dictionary containing each word and its relative frequency
    return TFFinal

Пример #8

Показать файл

Файл: TFIDF.py Проект: rmoskow1/BazakAttack

def parshaFreqIDF(parshaName, parshiot, freqParshiot):
    freqTFIDF = TFIDF(freqParshiot[parshaName], freqParshiot, 'hebrew')
    regTFIDF = Counter(parshiot[parshaName])
    for word,value in regTFIDF.items():
        regTFIDF[word] = freqTFIDF[Parshiot.processWordByFrequency(word)]
    return regTFIDF

Пример #9

Показать файл

import Parshiot, BazaakRead
import csv
import numpy as np
import matplotlib.pyplot as plt

hebResults = None
engResults = None
parshaNames = Parshiot.parshaNames()
subDir = 'Results\\'


# write bazaak results to a CSV file with parsha as column one and list of words as column 2
# option to pass in parshaResults (helpful if already generated, such as in the main here
def BazaakOutput(lang='heb', min_count=5, parshaResults=None, fileName=None):
    if not fileName:
        fileName = subDir + lang + 'Bazaak' + 'Output' + '.csv'
    parshaNames = Parshiot.parshaNames()
    if not parshaResults:
        parshaResults = BazaakAll(lang, min_count)

    with open(fileName, mode='w', encoding='utf-8') as csv_file:
        fieldnames = ['parsha', 'repeated words']
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

        writer.writeheader()
        for parsha in parshaNames:
            writer.writerow({
                'parsha':
                parsha,
                'repeated words':
                list(parshaResults[parsha].keys())