def __init__(self, data, pred, ratio=0.85): self.data = data self.pred = pred self.ratio = ratio self._training_data = None self._test_data = None self.logger = get_logger(self.__class__.__name__.lower())
from string import punctuation from itertools import filterfalse from functools import partial from nltk.tokenize import sent_tokenize, word_tokenize from nltk.corpus import stopwords from buttworld.logger import get_logger logger = get_logger(__name__) EXTRA_PUNCTUATION = '«»—…' TERMINATION_CHARS = '.»…!?)"\'' F_LOWERCASE = 0b00001 F_PUNCTUATION = 0b00010 F_STOPWORD = 0b00100 F_NUMBER = 0b01000 FILTER_NONE = 0b00000 FILTER_ALL = F_LOWERCASE | F_PUNCTUATION | F_STOPWORD | F_NUMBER def _is_sent_terminator(token): return token in TERMINATION_CHARS def _is_number(token): try:
def __init__(self): self.logger = get_logger(self.__class__.__name__.lower())
def __init__(self, max_retires=None): self.max_retries = max_retires self.tasks = [] self.results = [] self.logger = get_logger(self.__class__.__name__.lower())
def __init__(self, classifiers: Iterable): self.train_data = None self.test_data = None self.classifiers = classifiers self.logger = get_logger(self.__class__.__name__.lower())
def __init__(self, check_url: str = DEFAULT_CHECK_URL): self.check_url = check_url self._instance_proxies = None self._aiter = None self.logger = get_logger(self.__class__.__name__.lower())