def feed(self, character: str) -> None: if character.isalpha(): self._buffer = "".join([self._buffer, character]) if is_accentuated(character): self._buffer_accent_count += 1 if self._foreign_long_watch is False and is_latin( character ) is False and is_cjk(character) is False and is_hangul( character ) is False and is_katakana(character) is False and is_hiragana( character) is False and is_thai(character) is False: self._foreign_long_watch = True return if not self._buffer: return if (character.isspace() or is_punctuation(character) or is_separator(character)) and self._buffer: self._word_count += 1 buffer_length = len(self._buffer) # type: int self._character_count += buffer_length if buffer_length >= 4 and self._buffer_accent_count / buffer_length >= 0.3: self._is_current_word_bad = True if buffer_length >= 24 and self._foreign_long_watch: self._is_current_word_bad = True if self._is_current_word_bad: self._bad_word_count += 1 self._bad_character_count += len(self._buffer) self._is_current_word_bad = False self._foreign_long_watch = False self._buffer = "" self._buffer_accent_count = 0 elif character not in { "<", ">", "-", "=" } and character.isdigit() is False and is_symbol(character): self._is_current_word_bad = True self._buffer += character
def eligible(self, character: str) -> bool: return is_latin(character)
def eligible(self, character: str) -> bool: return character.isalpha() and is_latin(character)