Python Cleaners примеры использования

Язык программирования: Python

Пространство имен/Пакет: TextClass.Cleaners

Класс/Тип: Cleaners

Примеров на hotexamples.com: 19

Python Cleaners - 19 примеров найдено. Это лучшие примеры Python кода для TextClass.Cleaners.Cleaners, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Cleaners(19)

remove_stopwords(3)

replace_regex_pattern(3)

remove_punctuation(2)

stem(2)

tokenize(2)

_load_stopwords(1)

lower_text(1)

remove_excess_spaces(1)

replace_string_from_list(1)

Пример #1

Показать файл

    def test_initialization_with_stopwords_from_file(self):
        stopwords_filename = 'tests/stopwords/stopwords.txt'
        cl = Cleaners(stopwords_filename)

        with open(stopwords_filename) as file:
            stopwords = file.readlines()
        stopwords_list = []
        for line in stopwords:
            stopwords_list.extend(line.strip().split(','))

        stopwords_list = list(set(stopwords_list))
        stopwords_list.remove('')
        stopwords_list = [stopword.strip() for stopword in stopwords_list]

        self.assertEqual(cl.stopwords, stopwords_list)

Пример #2

Показать файл

 def test_remove_punctuation_no_custom(self):
     cl = Cleaners()
     no_punct = cl.remove_punctuation('palju:;" on siin!')
     self.assertEqual(no_punct, 'palju    on siin ')

Пример #3

Показать файл

 def test_lower(self):
     cl = Cleaners()
     text = cl.lower_text('Suur TEXT')
     self.assertEqual(text, 'suur text')

Пример #4

Показать файл

 def test_remove_stopwords_default(self):
     cl = Cleaners('tests/stopwords/stopwords.txt')
     clean = cl.remove_stopwords(['yes', 'very', 'no'], return_string=True)
     self.assertEqual(clean, 'very')

Пример #5

Показать файл

 def test_remove_stopwords_return_string(self):
     cl = Cleaners()
     clean = cl.remove_stopwords(['mina', 'olen', 'pikk', 'poiss'],
                                 stopwords=['olen'],
                                 return_string=True)
     self.assertEqual(clean, 'mina pikk poiss')

Пример #6

Показать файл

 def test_initalization_with_stopwords_from_list(self):
     stopwords = ['a', 'the']
     cl = Cleaners(stopwords)
     self.assertEqual(cl.stopwords, stopwords)

Пример #7

Показать файл

 def test_replace_regex_pattern_literally(self):
     cl = Cleaners()
     text = 'verygood \d it is'
     clean = cl.replace_regex_pattern('\d', text, escape_regex=True)
     self.assertEqual(clean, 'verygood   it is')

Пример #8

Показать файл

 def test_replace_regex_pattern(self):
     cl = Cleaners()
     clean = cl.replace_regex_pattern(pattern='\d',
                                      text='minu tekst 5 on siin',
                                      replace='')
     self.assertEqual(clean, 'minu tekst  on siin')

Пример #9

Показать файл

 def test_tokenize(self):
     cl = Cleaners()
     tokens = cl.tokenize('mina olen pikk poiss')
     self.assertEqual(tokens, ['mina', 'olen', 'pikk', 'poiss'])

Пример #10

Показать файл

 def test_load_stopwords(self):
     cl = Cleaners(None)
     cl._load_stopwords(
         os.path.join(os.getcwd(), 'tests/stopwords/stopwords.txt'))
     self.assertEqual(type(cl.stopwords), list)
     self.assertEqual(len(cl.stopwords), 2)

Пример #11

Показать файл

 def test_replace_string_from_list(self):
     cl = Cleaners()
     text = "very bad movie it was"
     clean = cl.replace_string_from_list(text, ['movie'])
     self.assertEqual(clean, "very bad   it was")

Пример #12

Показать файл

 def test_stem_as_str(self):
     cl = Cleaners()
     text = 'I was flying around heavily'
     text_stemmed = cl.stem(text, 'english', as_list=False)
     self.assertEqual(text_stemmed, 'i was fli around heavili')

Пример #13

Показать файл

 def test_stem_as_list(self):
     cl = Cleaners()
     text = 'I was flying around heavily'
     text_stemmed = cl.stem(text, 'english')
     self.assertEqual(text_stemmed,
                      ['i', 'was', 'fli', 'around', 'heavili'])

Пример #14

Показать файл

 def test_remove_punctuation_custom(self):
     cl = Cleaners()
     no_punct = cl.remove_punctuation('suured 4 ja 5 pikad laused',
                                      custom_punctutation='45')
     self.assertEqual(no_punct, 'suured   ja   pikad laused')

Пример #15

Показать файл

 def test_tokenize_list(self):
     cl = Cleaners()
     tokens = cl.tokenize([['mina olen'], ['pikk poiss']])
     self.assertEqual(tokens, [['mina', 'olen'], ['pikk', 'poiss']])

Пример #16

Показать файл

 def test_remove_excess_space(self):
     cl = Cleaners()
     clean = cl.remove_excess_spaces('many   spaces    here')
     self.assertEqual(clean, 'many spaces here')

Пример #17

Показать файл

 def test_remove_stopwords_return_list(self):
     cl = Cleaners()
     clean = cl.remove_stopwords(['mina', 'olen', 'pikk', 'poiss'],
                                 stopwords=['olen'])
     self.assertEqual(clean, ['mina', 'pikk', 'poiss'])

Пример #18

Показать файл

 def test_replace_regex_pattern_list(self):
     cl = Cleaners()
     clean = cl.replace_regex_pattern(
         pattern='\d', text=['minu', 'tekst', '5', 'on siin'], replace='')
     self.assertEqual(clean, ['minu', 'tekst', '', 'on siin'])

Пример #19

Показать файл

 def test_initalization_without_stopwords(self):
     cl = Cleaners(stopwords=None)
     self.assertIsNone(cl.stopwords)
     self.assertIsNone(cl.stopwords)