Пример #1
0
 def __init__(self, data, pred, ratio=0.85):
     self.data = data
     self.pred = pred
     self.ratio = ratio
     self._training_data = None
     self._test_data = None
     self.logger = get_logger(self.__class__.__name__.lower())
Пример #2
0
from string import punctuation
from itertools import filterfalse
from functools import partial

from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords

from buttworld.logger import get_logger


logger = get_logger(__name__)

EXTRA_PUNCTUATION = '«»—…'
TERMINATION_CHARS = '.»…!?)"\''


F_LOWERCASE     = 0b00001
F_PUNCTUATION   = 0b00010
F_STOPWORD      = 0b00100
F_NUMBER        = 0b01000

FILTER_NONE     = 0b00000
FILTER_ALL      = F_LOWERCASE | F_PUNCTUATION | F_STOPWORD | F_NUMBER


def _is_sent_terminator(token):
    return token in TERMINATION_CHARS


def _is_number(token):
    try:
Пример #3
0
 def __init__(self):
     self.logger = get_logger(self.__class__.__name__.lower())
Пример #4
0
 def __init__(self, max_retires=None):
     self.max_retries = max_retires
     self.tasks = []
     self.results = []
     self.logger = get_logger(self.__class__.__name__.lower())
Пример #5
0
 def __init__(self, classifiers: Iterable):
     self.train_data = None
     self.test_data = None
     self.classifiers = classifiers
     self.logger = get_logger(self.__class__.__name__.lower())
Пример #6
0
 def __init__(self, check_url: str = DEFAULT_CHECK_URL):
     self.check_url = check_url
     self._instance_proxies = None
     self._aiter = None
     self.logger = get_logger(self.__class__.__name__.lower())