def __init__(self): # Services self.__logger = Logger() self._exceptions_handler = ExceptionsHandler() self.__logger.info('SpellChecker was successfully initialized.', __name__)
def __init__(self): self.__logger = Logger() self._request_exceptions = [type(item) for item in [requests.ConnectionError(), requests.HTTPError(), requests.TooManyRedirects(), requests.Timeout(), requests.TooManyRedirects(), requests.RequestException(), requests.ConnectTimeout(), requests.ReadTimeout()]] self._system_errors = [type(item) for item in [KeyError(), AttributeError(), IndexError(), ZeroDivisionError(), SystemError(), ValueError(), AssertionError()]] self._file_errors = [type(item) for item in [FileExistsError(), FileNotFoundError()]] self._database_errors = [type(item) for item in [sqlite3.Error(), sqlite3.DataError(), sqlite3.ProgrammingError(), sqlite3.DatabaseError(), sqlite3.NotSupportedError(), sqlite3.IntegrityError(), sqlite3.InterfaceError(), sqlite3.InternalError(), sqlite3.OperationalError()]] self._speech_recognizer_errors = [type(item) for item in [sr.RequestError(), sr.UnknownValueError(), sr.WaitTimeoutError(), sr.RequestError()]] self.__logger.info('ExceptionsHandler was successfully initialized.', __name__)
def __init__(self): # Services self._spell_checker = SpellChecker() self.__logger = Logger() self._path_service = PathService() self._morph_analyzer = pymorphy2.MorphAnalyzer() # Data self._stop_words = self._read_stop_words() self._parts_of_speech_to_remove = ['NUMR', 'NPRO', 'PREP'] self.__logger.info('Lemmatizer was successfully initialized.', __name__)
def __init__(self): # Services self._database_cursor = DatabaseCursor() self._ngram_analyzer = NgramAnalyzer() self.__logger = Logger() self._path_service = PathService() # Data self._docs_count = dict() self._count_all_docs() self.__logger.info('TextWeightCounter was successfully initialized.', __name__)
class SpellChecker: def __init__(self): # Services self.__logger = Logger() self._exceptions_handler = ExceptionsHandler() self.__logger.info('SpellChecker was successfully initialized.', __name__) def check_spelling(self, text): self.__logger.info(f'Start text: {text}', __name__) try: response = requests.get( 'https://speller.yandex.net/services/spellservice.json/checkText', params={ 'text': text }).json() except BaseException as exception: self.__logger.error( self._exceptions_handler.get_error_message(exception), __name__) return text for word in response: text = text.replace(word['word'], word['s'][0]) self.__logger.info(f'Checked text: {text}', __name__) return text
class FileReader(QWidget): def __init__(self): super().__init__() self.__logger = Logger() self.__file_dialog = QFileDialog() self.__logger.info('FileReader was successfully initialized.', __name__) def _detect_encoding(self, filename): with open(filename, 'rb') as byte_file: byte_string = byte_file.read() encoding = chardet.detect(byte_string)['encoding'] self.__logger.info(f"File's encoding: {encoding}", __name__) return encoding def get_file_content(self): try: filename = self.__file_dialog.getOpenFileName(self, 'Open file', '/home')[0] self.__logger.info(f'Filename: {filename}', __name__) if filename: with open(filename, 'r', encoding=self._detect_encoding(filename)) as file: return file.read() except BaseException as exception: self.__logger.error(str(exception), __name__)
def __init__(self): # Services self._database_cursor = DatabaseCursor() self.__logger = Logger() self._exceptions_hanlder = ExceptionsHandler() self._lemmatizer = Lemmatizer() self._path_service = PathService() self._configurator = Configurator() self._morph_analyzer = pymorphy2.MorphAnalyzer() # Data self._vec_model = None self._load_vec_model() self.__logger.info('NgramAnalyzer was successfully initialized.', __name__)
def __init__(self): # Services self.__logger = Logger() # Data self._wd = os.getcwd() self.path_to_databases = None self.path_to_configs = None self._valid_classifiers = None self._valid_model_types = None self._valid_databases = None self._valid_test_results_modes = None self._valid_datasets = None self.path_to_stop_words = None self._path_to_main_directory = None self.path_to_vector_model = None self._path_to_classifier_models = None self._path_to_test_results = None self.configure() self.__logger.info('PathService was successfully configured.', __name__)
class NgramAnalyzer: def __init__(self): # Services self._database_cursor = DatabaseCursor() self.__logger = Logger() self._exceptions_hanlder = ExceptionsHandler() self._lemmatizer = Lemmatizer() self._path_service = PathService() self._configurator = Configurator() self._morph_analyzer = pymorphy2.MorphAnalyzer() # Data self._vec_model = None self._load_vec_model() self.__logger.info('NgramAnalyzer was successfully initialized.', __name__) def _load_vec_model(self): if not self._path_service.path_to_vector_model: self.__logger.warning("Vector model doesn't exist.", __name__) self._configurator.download_vector_model() self._path_service.set_path_to_vector_model(os.path.join(self._path_service.path_to_databases, 'ruscorpora_upos_skipgram_300_10_2017.bin.gz')) self.__logger.info('Vector model was successfully downloaded.', __name__) if self._path_service.path_to_vector_model: self._vec_model = gensim.models.KeyedVectors.load_word2vec_format(self._path_service.path_to_vector_model, binary=True) else: self.__logger.error("Vector model doesn't exist.", __name__) def _part_of_speech_detect(self, word): if not word: return part_of_speech = self._morph_analyzer.parse(word)[0].tag.POS if part_of_speech: if re.match(r'ADJ', part_of_speech): return 'ADJ' elif re.match(r'PRT', part_of_speech): return 'PRT' elif part_of_speech == 'INFN': return 'VERB' elif part_of_speech == 'ADVB' or part_of_speech == 'PRED': return 'ADV' elif part_of_speech == 'PRCL': return 'PART' return part_of_speech @staticmethod def _detect_ngram_type(ngram): if not ngram: return if ngram.count(' ') == 0: return 'unigram' elif ngram.count(' ') == 1: return 'bigram' elif ngram.count(' ') == 2: return 'trigram' def _nearest_synonyms_find(self, word, topn): if not self._vec_model or not word or topn <= 0: return nearest_synonyms = list() part_of_speech = self._part_of_speech_detect(word) ngram_type = self._detect_ngram_type(word) if part_of_speech: word = word + '_%s' % self._part_of_speech_detect(word) try: for synonym in self._vec_model.most_similar(positive=[word], topn=topn * 10): found_synonym = self._lemmatizer.get_text_initial_form(synonym[0].split('_')[0]) if found_synonym and self._detect_ngram_type(found_synonym) == ngram_type: nearest_synonyms.append({'word': found_synonym, 'cosine proximity': synonym[1]}) if len(nearest_synonyms) == topn: break except BaseException as exception: self.__logger.warning(self._exceptions_hanlder.get_error_message(exception), __name__) return return nearest_synonyms def relevant_ngram_find(self, ngram): if not ngram: return self.__logger.info(f'Start ngram: {ngram}', __name__) response = {'synonym_found': False, 'content': dict()} if self._detect_ngram_type(ngram) == 'unigram': synonyms_count = 10 nearest_synonyms = self._nearest_synonyms_find(ngram, synonyms_count) if not nearest_synonyms: return response for nearest_synonym in nearest_synonyms: data = self._database_cursor.get_entry(nearest_synonym['word']) if data and data[0]: self.__logger.info(f'Relevant ngram: {nearest_synonym["word"]}', __name__) response['synonym_found'] = True response['content']['synonym'] = nearest_synonym['word'] response['content']['pos_docs'] = data[0] response['content']['neg_docs'] = data[1] return response return response
class Lemmatizer: def __init__(self): # Services self._spell_checker = SpellChecker() self.__logger = Logger() self._path_service = PathService() self._morph_analyzer = pymorphy2.MorphAnalyzer() # Data self._stop_words = self._read_stop_words() self._parts_of_speech_to_remove = ['NUMR', 'NPRO', 'PREP'] self.__logger.info('Lemmatizer was successfully initialized.', __name__) @staticmethod def _contains_latin_letter(word): if word: return all(map(lambda c: c in ascii_letters, word)) def _detect_part_of_speech(self, word): if word: return self._morph_analyzer.parse(word)[0].tag.POS def _is_stop_word(self, word): if not word: self.__logger.warning('Got empty word.', __name__) return word = f' {word} ' for stop_words in self._stop_words.values(): if word in stop_words: return True return False def _remove_words_without_emotions(self, text): if not text: self.__logger.warning('Got empty text.', __name__) return cleaned_text = list() for word in re.findall(r'\w+', text): if not self._detect_part_of_speech(word) in self._parts_of_speech_to_remove and\ not self._is_stop_word(word): cleaned_text.append(word) return ' '.join(cleaned_text).strip() def _read_stop_words(self): if os.path.exists(self._path_service.path_to_stop_words): with open(self._path_service.path_to_stop_words, 'r', encoding='utf-8') as file: return json.load(file) def _delete_words_contains_latin_letters(self, text): text = ' '.join([ word for word in re.findall( r'\w+', self._spell_checker.check_spelling(text.lower())) if not self._contains_latin_letter(word) and word.isalpha() ]).strip() if text: return text else: self.__logger.warning( 'All words in document contain latin letters or all words are digits.', __name__) def _get_text_normal_form(self, text): return ' '.join([self._morph_analyzer.parse(word)[0].normal_form + ' ' for word in re.findall(r'\w+', text)]) \ .strip() def get_text_initial_form(self, text): if not text: self.__logger.warning('Got empty text.', __name__) return self.__logger.info(f'Start text: {text}', __name__) transformations = [ self._delete_words_contains_latin_letters, self._get_text_normal_form, self._remove_words_without_emotions ] for transformation in transformations: text = transformation(text) if not text: return self.__logger.info(f'Lemmatized text: {text}', __name__) return text
class TextWeightCounter: def __init__(self): # Services self._database_cursor = DatabaseCursor() self._ngram_analyzer = NgramAnalyzer() self.__logger = Logger() self._path_service = PathService() # Data self._docs_count = dict() self._count_all_docs() self.__logger.info('TextWeightCounter was successfully initialized.', __name__) def _count_docs_in_dataset(self, mode): path_to_dataset = self._path_service.get_path_to_dataset( f'dataset_with_{mode}.csv') with open(path_to_dataset, 'r', encoding='utf-8') as file: negative_docs_shift = 10000 positive_docs = 0 negative_docs = negative_docs_shift for row in csv.reader(file): if ''.join(row).split(';')[1] == 'positive': positive_docs += 1 else: negative_docs += 1 return positive_docs + negative_docs - negative_docs_shift, positive_docs, negative_docs def _count_all_docs(self): modes = ['unigrams', 'bigrams', 'trigrams'] for mode in modes: self._docs_count[mode] = dict() self._docs_count[mode]['all_docs'], self._docs_count[mode]['positive_docs'], \ self._docs_count[mode]['negative_docs'] = self._count_docs_in_dataset(mode) @staticmethod def _detect_ngram_type(ngram): if ngram.count(' ') == 0: return 'unigram' elif ngram.count(' ') == 1: return 'bigram' elif ngram.count(' ') == 2: return 'trigram' def _count_ngram_weight(self, ngram): self.__logger.info(f'Ngram: {ngram}', __name__) ngram_type = self._detect_ngram_type(ngram) delta_tf_idf = 0 self.__logger.info(f'Ngram_type: {ngram_type}', __name__) if self._database_cursor.entry_exists(ngram): pos_docs_word, neg_docs_word = self._database_cursor.get_entry( ngram) delta_tf_idf = math.log10( (self._docs_count[ngram_type + 's']['negative_docs'] * pos_docs_word) / (self._docs_count[ngram_type + 's']['positive_docs'] * neg_docs_word)) else: response = self._ngram_analyzer.relevant_ngram_find(ngram) if response['synonym_found']: if ngram_type == 'unigram': pos_docs_word, neg_docs_word = response['content'][ 'pos_docs'], response['content']['neg_docs'] if (not (pos_docs_word and neg_docs_word)) or ( pos_docs_word == 1 and neg_docs_word == 1): return 0 delta_tf_idf = math.log10( (self._docs_count[ngram_type + 's']['negative_docs'] * pos_docs_word) / (self._docs_count[ngram_type + 's']['positive_docs'] * neg_docs_word)) return delta_tf_idf def count_weight_by_unigrams(self, unigrams): checked_unigrams = list() important_unigrams = list() unigrams_weight = 0 for unigram in unigrams: if unigram not in checked_unigrams: this_doc_unigram = unigrams.count(unigram) unigram_weight = this_doc_unigram * self._count_ngram_weight( unigram) unigrams_weight += unigram_weight checked_unigrams.append(unigram) if unigram_weight: important_unigrams.append(unigram) if len(important_unigrams) >= round( len(unigrams) * 0.6) and important_unigrams: unigrams_weight = unigrams_weight / len(important_unigrams) self.__logger.info(f'Unigrams weight: {unigrams_weight}', __name__) return unigrams_weight def count_weight_by_bigrams(self, bigrams): if not bigrams: return None checked_bigrams = list() important_bigrams = list() bigrams_weight = 0 for bigram in bigrams: if bigram not in checked_bigrams: this_doc_bigram = bigrams.count(bigram) bigram_weight = this_doc_bigram * self._count_ngram_weight( bigram) bigrams_weight += bigram_weight checked_bigrams.append(bigram) if bigram_weight: important_bigrams.append(bigram) if len(important_bigrams) >= len(bigrams) // 2 and important_bigrams: bigrams_weight = bigrams_weight / len(important_bigrams) self.__logger.info(f'Bigrams weight: {bigrams_weight}', __name__) return bigrams_weight def count_weight_by_trigrams(self, trigrams): if not trigrams: return None checked_trigrams = list() important_trigrams = list() trigrams_weight = 0 for trigram in trigrams: if trigram not in checked_trigrams: this_doc_trigram = trigrams.count(trigram) trigram_weight = this_doc_trigram * self._count_ngram_weight( trigram) trigrams_weight += trigram_weight checked_trigrams.append(trigram) if trigram_weight: important_trigrams.append(trigram) if len(important_trigrams) >= round( len(trigrams) * 0.4) and important_trigrams: trigrams_weight = trigrams_weight / len(important_trigrams) self.__logger.info(f'Trigrams weight: {trigrams_weight}', __name__) return trigrams_weight
class PathService(metaclass=Singleton): def __init__(self): # Services self.__logger = Logger() # Data self._wd = os.getcwd() self.path_to_databases = None self.path_to_configs = None self._valid_classifiers = None self._valid_model_types = None self._valid_databases = None self._valid_test_results_modes = None self._valid_datasets = None self.path_to_stop_words = None self._path_to_main_directory = None self.path_to_vector_model = None self._path_to_classifier_models = None self._path_to_test_results = None self.configure() self.__logger.info('PathService was successfully configured.', __name__) def _find_main_directory(self): max_nesting_level = 5 nesting_level = 0 while not os.getcwd().endswith('Python'): if os.getcwd().endswith('Databases'): os.chdir(os.path.join('..', 'Python')) break else: os.chdir('..') nesting_level += 1 if nesting_level > max_nesting_level: self.__logger.fatal("Can't find main directory (exceeded maximum nesting level).", __name__) exit(-1) self._path_to_main_directory = os.getcwd() self.path_to_configs = os.path.join(self._path_to_main_directory, 'Services', 'Configs') self.path_to_databases = os.path.abspath(os.path.join('..', 'Databases')) os.chdir(self._wd) def _check_paths_existing(self): if not os.path.exists(self.path_to_configs): self.__logger.fatal("Directory with config files doesn't exist.", __name__) exit(-1) elif not os.path.exists(self.path_to_databases): self.__logger.fatal("Directory with databases doesn't exist.", __name__) exit(-1) elif not os.path.exists(self._path_to_classifier_models): self.__logger.fatal("Directory with classifier models doesn't exist.", __name__) exit(-1) if not os.path.exists(self.path_to_vector_model): self.path_to_vector_model = None self.__logger.error("Vector model doesn't exist.", __name__) if not os.path.exists(self.path_to_stop_words): self.path_to_stop_words = None self.__logger.error("File with stop-words doesn't exist.", __name__) if not os.path.exists(self._path_to_test_results): self._path_to_test_results = None self.__logger.warning("Directory with tests reports doesn't exist.", __name__) def _load_config(self): path_to_config = os.path.join(self.path_to_configs, 'path_service.json') if not os.path.exists(path_to_config): self.__logger.error("Can't find config-file for PathService.", __name__) with open(path_to_config, 'r', encoding='utf-8') as file: config = json.load(file) self._valid_classifiers = config['valid_classifiers'] self._valid_databases = config['valid_databases'] self._valid_datasets = config['valid_datasets'] self._valid_test_results_modes = config['valid_test_results_modes'] self._valid_model_types = config['valid_model_types'] def configure(self): self._find_main_directory() self._load_config() self.path_to_vector_model = os.path.join(self.path_to_databases, 'ruscorpora_upos_skipgram_300_10_2017.bin.gz') self.path_to_stop_words = os.path.join(self._path_to_main_directory, 'Services', 'Lemmatizer', 'stop_words.json') self._path_to_classifier_models = os.path.join(self.path_to_databases, 'Models') self._path_to_test_results = os.path.join(self._path_to_main_directory, 'Tests', 'System', 'Reports') self._check_paths_existing() def get_path_to_test_results(self, mode='classifier', classifier_name='NBC'): if classifier_name not in self._valid_classifiers: self.__logger.warning('Got incorrect classifier name.', __name__) classifier_name = 'NBC' if classifier_name not in self._valid_test_results_modes: self.__logger.warning('Got incorrect mode.', __name__) return self._path_to_test_results if mode.lower().strip() == 'vec_model': return os.path.join(self._path_to_test_results, 'VectorModel') elif mode.lower().strip() == 'classifier_main': return os.path.join(self._path_to_test_results, '..', '..', 'MainReports', 'Classifier', classifier_name) elif mode.lower().strip() == 'classifier': return self._path_to_test_results def get_path_to_model(self, model='unigrams', classifier_name='NBC'): if classifier_name not in self._valid_classifiers: self.__logger.warning('Got incorrect classifier name.', __name__) classifier_name = 'NBC' if model not in self._valid_model_types: self.__logger.warning('Got incorrect model type.', __name__) model = 'unigrams' path_to_models = os.path.join(self._path_to_classifier_models, classifier_name) if os.path.exists(path_to_models): path_to_required_model = os.path.join(path_to_models, f'model_{model}.pkl') return path_to_required_model else: self.__logger.error("Required model wasn't found.", __name__) def get_path_to_database(self, database_name='unigrams.db'): if database_name not in self._valid_databases: self.__logger.warning('Got incorrect database name.', __name__) database_name = 'unigrams.db' path_to_database = os.path.join(self.path_to_databases, database_name) return path_to_database def get_path_to_dataset(self, dataset): if dataset not in self._valid_datasets: self.__logger.warning('Got incorrect dataset name.', __name__) dataset = 'dataset_with_unigrams.csv' path_to_dataset = os.path.join(self.path_to_databases, dataset) return path_to_dataset def set_path_to_vector_model(self, path_to_vector_model): self.path_to_vector_model = path_to_vector_model
class ExceptionsHandler: def __init__(self): self.__logger = Logger() self._request_exceptions = [type(item) for item in [requests.ConnectionError(), requests.HTTPError(), requests.TooManyRedirects(), requests.Timeout(), requests.TooManyRedirects(), requests.RequestException(), requests.ConnectTimeout(), requests.ReadTimeout()]] self._system_errors = [type(item) for item in [KeyError(), AttributeError(), IndexError(), ZeroDivisionError(), SystemError(), ValueError(), AssertionError()]] self._file_errors = [type(item) for item in [FileExistsError(), FileNotFoundError()]] self._database_errors = [type(item) for item in [sqlite3.Error(), sqlite3.DataError(), sqlite3.ProgrammingError(), sqlite3.DatabaseError(), sqlite3.NotSupportedError(), sqlite3.IntegrityError(), sqlite3.InterfaceError(), sqlite3.InternalError(), sqlite3.OperationalError()]] self._speech_recognizer_errors = [type(item) for item in [sr.RequestError(), sr.UnknownValueError(), sr.WaitTimeoutError(), sr.RequestError()]] self.__logger.info('ExceptionsHandler was successfully initialized.', __name__) @staticmethod def _handle_system_exception(exception): if isinstance(exception, KeyError): return 'Key error occurred.' elif isinstance(exception, AttributeError): return 'AttributeError occurred.' elif isinstance(exception, IndexError): return 'Index error occurred.' elif isinstance(exception, ZeroDivisionError): return 'ZeroDivisionError occurred.' elif isinstance(exception, SystemError): return 'SystemError occurred.' elif isinstance(exception, ValueError): return 'ValueError occurred.' elif isinstance(exception, AssertionError): return 'Assertion error occurred.' @staticmethod def _handle_file_exception(exception): if isinstance(exception, FileNotFoundError): return f'FileNotFoundError occurred ({str(exception.filename)}).' elif isinstance(exception, FileExistsError): return f'FileExistsError occurred ({str(exception.filename)}).' @staticmethod def _handle_database_exception(exception): if isinstance(exception, sqlite3.OperationalError): return 'sqlite3.Operational occurred.' elif isinstance(exception, sqlite3.ProgrammingError): return 'sqlite3.ProgrammingError occurred.' elif isinstance(exception, sqlite3.InternalError): return 'sqlite3.InternalError occurred.' elif isinstance(exception, sqlite3.InterfaceError): return 'sqlite3.InterfaceError occurred.' elif isinstance(exception, sqlite3.IntegrityError): return 'sqlite3.IntegrityError occurred.' elif isinstance(exception, sqlite3.NotSupportedError): return 'sqlite3.NotSupportedError occurred.' elif isinstance(exception, sqlite3.DatabaseError): return 'sqlite3.DatabaseError occurred.' elif isinstance(exception, sqlite3.DataError): return 'sqlite3.DataError occurred.' elif isinstance(exception, sqlite3.Error): return 'sqlite3.Error occurred.' @staticmethod def _handle_request_exception(exception): if isinstance(exception, requests.ConnectionError): return 'Problems with connection (requests.ConnectionError).' elif isinstance(exception, requests.HTTPError): return 'HHTP request return unsuccessful status code (requests.HTTPError).' elif isinstance(exception, requests.Timeout): return 'Request times out (requests.Timeout).' elif isinstance(exception, requests.TooManyRedirects): return 'Request exceeds the configured number of maximum redirections (requests.TooManyRedirects).' elif isinstance(exception, requests.ConnectTimeout): return 'ConnectTimeout (requests.ConnectTimeout).' elif isinstance(exception, requests.ReadTimeout): return 'ReadTimeout (requests.ReadTimeout).' else: return 'Request exception (requests.RequestException).' @staticmethod def _handle_speech_recognizer_exception(exception): if isinstance(exception, sr.WaitTimeoutError): return 'speech_recognition.WaitTimeoutError occurred.' elif isinstance(exception, sr.UnknownValueError): return 'Unknown value (speech_recognoition.UnknownValueError).' elif isinstance(exception, sr.RequestError): return 'speech_recognition.RequestError occurred.' def get_error_message(self, exception): if type(exception) in self._system_errors: return self._handle_system_exception(exception) elif type(exception) in self._file_errors: return self._handle_file_exception(exception) elif type(exception) in self._request_exceptions: return ExceptionsHandler._handle_request_exception(exception) elif type(exception) in self._speech_recognizer_errors: return self._handle_speech_recognizer_exception(exception) return str(exception)
def __init__(self): super().__init__() self.__logger = Logger() self.__file_dialog = QFileDialog() self.__logger.info('FileReader was successfully initialized.', __name__)
def __init__(self): self.__logger = Logger() self.__logger.info('DocumentPreparer was successfully initialized.', __name__)
class DocumentPreparer: def __init__(self): self.__logger = Logger() self.__logger.info('DocumentPreparer was successfully initialized.', __name__) def split_into_unigrams(self, text): if text: return re.findall(r'\w+', text) else: self.__logger.warning('Got empty text.', __name__) def split_into_bigrams(self, text): if not text: self.__logger.warning('Got empty text.', __name__) return unigrams = self.split_into_unigrams(text) bigrams = list() if len(unigrams) >= 2: for unigram_index in range(len(unigrams) - 1): bigram = ' '.join( sorted( [unigrams[unigram_index], unigrams[unigram_index + 1]])).strip() bigrams.append(bigram) return bigrams else: self.__logger.info("Text doesn't contain enough words.", __name__) def split_into_trigrams(self, text): if not text: self.__logger.warning('Got empty text.', __name__) return unigrams = self.split_into_unigrams(text) trigrams = list() if len(unigrams) >= 3: for unigram_index in range(len(unigrams) - 2): trigram = ' '.join( sorted([ unigrams[unigram_index], unigrams[unigram_index + 1], unigrams[unigram_index + 2] ])).strip() trigrams.append(trigram) return trigrams else: self.__logger.info("Text doesn't contain enough words.", __name__)