Python StopWord.getStopWord примеры использования

Язык программирования: Python

Класс/Тип: StopWord

Метод/Функция: getStopWord

Примеров на hotexamples.com: 4

Python StopWord.getStopWord - 4 примера найдено. Это лучшие примеры Python кода для StopWord.getStopWord из пакета 100knock2019, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

EnglishStopWord(2)

filter_words(2)

getStopWord(2)

StopWord(1)

createVocabDict(1)

dictToStopwordList(1)

stop_word(1)

Пример #1

Показать файл

Файл: Input.py Проект: nsknojj/data_mining

def parse(stat, path='mirror/',  n_news=10000):
    stopWord = StopWord.getStopWord()
    print(str(stopWord))
    lastDoc = []
    for number in range(1, n_news+1):
        filename = path + str(number) + '.txt'
        with open(filename, 'rb') as fin:
            if fin:
                s = fin.readline()    # title
                print(number, s)
                s = fin.readline()    # body
                termList = re.split('[^a-zA-Z]+', s)
                pass
                s = fin.readline()    # category
                if s in stat.cats:
                    for item in termList:
                        item = item.lower()
                        if not ((item in stopWord) or (len(item) == 1)):
                            stat.catTermAmount[stat.cats[s]] += 1
                            if not (item in stat.terms):
                                stat.termToInt[item] = len(stat.terms)
                                stat.terms.append(item)
                                stat.termInDoc.append(0)
                                stat.termAmount.append(0)
                                lastDoc.append(-1)
                            stat.totalTerm += 1
                            no = stat.termToInt[item]
                            if lastDoc[no] != number:
                                lastDoc[no] = number
                                stat.termInDoc[no] += 1
                            stat.termAmount[no] += 1
                            stat.termInCat[stat.cats[s]][no] += 1

Пример #2

Показать файл

def parse(stat, path='mirror/', n_news=10000):
    stopWord = StopWord.getStopWord()
    print(str(stopWord))
    lastDoc = []
    for number in range(1, n_news + 1):
        filename = path + str(number) + '.txt'
        with open(filename, 'rb') as fin:
            if fin:
                s = fin.readline()  # title
                print(number, s)
                s = fin.readline()  # body
                termList = re.split('[^a-zA-Z]+', s)
                pass
                s = fin.readline()  # category
                if s in stat.cats:
                    for item in termList:
                        item = item.lower()
                        if not ((item in stopWord) or (len(item) == 1)):
                            stat.catTermAmount[stat.cats[s]] += 1
                            if not (item in stat.terms):
                                stat.termToInt[item] = len(stat.terms)
                                stat.terms.append(item)
                                stat.termInDoc.append(0)
                                stat.termAmount.append(0)
                                lastDoc.append(-1)
                            stat.totalTerm += 1
                            no = stat.termToInt[item]
                            if lastDoc[no] != number:
                                lastDoc[no] = number
                                stat.termInDoc[no] += 1
                            stat.termAmount[no] += 1
                            stat.termInCat[stat.cats[s]][no] += 1

Пример #3

Показать файл

Файл: Test.py Проект: nsknojj/data_mining

def test(stat, path='', n_test=10):
    allCat = {'Crime and law': 0, 'Culture and entertainment': 0, 'Disasters and accidents': 0,
              'Science and technology': 0, 'Health': 0}
    callBack = dict(allCat)
    callAll = dict(allCat)
    stopWord = StopWord.getStopWord()
    termSum = len(stat.terms)
    correct = 0
    wrong = 0
    for n in range(1, n_test+1):
        filename = path + str(n) + '.txt'

        with open(filename, 'rb') as fin:
            title = fin.readline().strip()
            termList = re.split('[^a-zA-Z]+', fin.readline())
            maxi = 0
            toCat = ''

            for cat in stat.cats:   #
                noC = stat.cats[cat]
                p = 0.0
                for t in termList:
                    t = t.lower()
                    if not (t in stopWord or len(t) == 1):
                        if t in stat.terms:
                            noT = stat.termToInt[t]
                            p += math.log(1.0 * (stat.termInCat[noC][noT] + 1) / (stat.catTermAmount[noC] + termSum))
                p += math.log(1.0 * (stat.catTermAmount[noC] + 0.01) / stat.totalTerm)
                if p > maxi or toCat == '':
                    maxi = p
                    toCat = cat

            cat = fin.readline().strip()
            if cat in stat.cats:
                allCat[cat] += 1
                callAll[toCat] += 1
                if toCat == cat:
                    callBack[cat] += 1
                    correct += 1
                    print(title + '  :  ' + cat + '   toCat: ' + toCat + '  Yes')
                else:
                    wrong += 1
                    print(title + '  :  ' + cat + '   toCat: ' + toCat + '  No')

    print('\nTotal Precision:  correct / total = %d / %d' % (correct, correct + wrong))
    for cat in allCat:
        print('[' + cat + ']')
        if callAll[cat] > 0:
            p = callBack[cat] * 100.0 / callAll[cat]
        else:
            p = -1
        if allCat[cat] > 0:
            r = callBack[cat] * 100.0 / allCat[cat]
        else:
            r = -1
        print('Precision : %d / %d = %.3f%%' % (callBack[cat], callAll[cat], p))
        print('Recall : %d / %d = %.3f%%' % (callBack[cat], allCat[cat], r))
        print('F = %.3f%%' % (2.0 * p * r / (p + r)))

Пример #4

Показать файл

Файл: Test.py Проект: nsknojj/data_mining

def test(stat, path='', n_test=10):
    allCat = {
        'Crime and law': 0,
        'Culture and entertainment': 0,
        'Disasters and accidents': 0,
        'Science and technology': 0,
        'Health': 0
    }
    callBack = dict(allCat)
    callAll = dict(allCat)
    stopWord = StopWord.getStopWord()
    termSum = len(stat.terms)
    correct = 0
    wrong = 0
    for n in range(1, n_test + 1):
        filename = path + str(n) + '.txt'

        with open(filename, 'rb') as fin:
            title = fin.readline().strip()
            termList = re.split('[^a-zA-Z]+', fin.readline())
            maxi = 0
            toCat = ''

            for cat in stat.cats:  #
                noC = stat.cats[cat]
                p = 0.0
                for t in termList:
                    t = t.lower()
                    if not (t in stopWord or len(t) == 1):
                        if t in stat.terms:
                            noT = stat.termToInt[t]
                            p += math.log(1.0 *
                                          (stat.termInCat[noC][noT] + 1) /
                                          (stat.catTermAmount[noC] + termSum))
                p += math.log(1.0 * (stat.catTermAmount[noC] + 0.01) /
                              stat.totalTerm)
                if p > maxi or toCat == '':
                    maxi = p
                    toCat = cat

            cat = fin.readline().strip()
            if cat in stat.cats:
                allCat[cat] += 1
                callAll[toCat] += 1
                if toCat == cat:
                    callBack[cat] += 1
                    correct += 1
                    print(title + '  :  ' + cat + '   toCat: ' + toCat +
                          '  Yes')
                else:
                    wrong += 1
                    print(title + '  :  ' + cat + '   toCat: ' + toCat +
                          '  No')

    print('\nTotal Precision:  correct / total = %d / %d' %
          (correct, correct + wrong))
    for cat in allCat:
        print('[' + cat + ']')
        if callAll[cat] > 0:
            p = callBack[cat] * 100.0 / callAll[cat]
        else:
            p = -1
        if allCat[cat] > 0:
            r = callBack[cat] * 100.0 / allCat[cat]
        else:
            r = -1
        print('Precision : %d / %d = %.3f%%' %
              (callBack[cat], callAll[cat], p))
        print('Recall : %d / %d = %.3f%%' % (callBack[cat], allCat[cat], r))
        print('F = %.3f%%' % (2.0 * p * r / (p + r)))