Exemplo n.º 1
0
 def test_sanity(self):
     self.assertEqual(preprocess.separate('how are you?', sep='?'),
                      'how are you ?')
     self.assertEqual(
         preprocess.separate('how are you,man?',
                             sep=('?', ','),
                             between_char=True), 'how are you , man ?')
     self.assertEqual(preprocess.separate('how are! you?'),
                      'how are ! you ?')
Exemplo n.º 2
0
def separate(text: AnyStr,
             sep: Union[str, Sequence[str]] = ('!', '?', '.'),
             between_char=False) -> str:
    warnings.warn(
        f"This function will be deprecated in future versions. "
        f"preprocess.separate", DeprecationWarning, 2)
    return _preprocess.separate(text=text, sep=sep, between_char=between_char)
Exemplo n.º 3
0
    def test_sanity(self):
        self.assertEqual(preprocess.separate('how are you?', sep='?'),
                         'how are you ?')
        self.assertEqual(
            preprocess.separate('how are you,man?',
                                sep=('?', ','),
                                between_char=True), 'how are you , man ?')
        self.assertEqual(preprocess.separate('how are! you?'),
                         'how are ! you ?')

        freq = Freq([1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 'hi', 'o', 'a'])
        self.assertEqual(freq.sample(max_freq=1), {
            4: 1,
            5: 1,
            6: 1,
            7: 1,
            8: 1,
            'hi': 1,
            'o': 1,
            'a': 1
        })
        self.assertEqual(freq.sample(freq=2), {1: 2, 3: 2, 2: 2})

        self.assertRaises(AssertionError, freq.sample, freq=1, max_freq=2)
        self.assertRaises(AssertionError, freq.sample, freq=1, min_freq=2)

        freq = Freq([1, 2, 3, 3, 4, 5, 6, 7, 6, 7, 12, 31, 123, 5, 3])
        self.assertEqual(freq.least_freq(), {
            123: 1,
            31: 1,
            12: 1,
            4: 1,
            2: 1,
            1: 1,
            7: 2,
            6: 2,
            5: 2,
            3: 3
        })
Exemplo n.º 4
0
 def _preprocess(self, sentence, is_destructive: bool):
     if is_destructive or self.config.to_lower:
         sentence = sentence.lower()
     sentence = _preprocess.remove_extra_chars(sentence)
     sentence = _preprocess.remove_non_language_elements(sentence)
     if self.config.name == 'en':
         sentence = _preprocess.replace_english_contractions(sentence)
     if is_destructive or self.config.is_remove_accent:
         sentence = _preprocess.accent_remove(sentence)
     sentence = _preprocess.separate(sentence)
     if is_destructive or self.config.is_remove_punctuation:
         sentence = _preprocess.remove_punctuation(sentence, self.config.punctuation)
     if is_destructive or self.config.is_remove_stop_words:
         sentence = ' '.join([w for w in sentence.split() if w not in self.config.stop_words])
     return _preprocess.remove_extra_chars(sentence)