def __init__(self, classifier_name='NBC'): # Services self._configurator = Configurator() self._configurator.configure_system() self._database_cursor = DatabaseCursor() self._document_preparer = DocumentPreparer() self._text_weight_counter = TextWeightCounter() self._classifier = Classifier() self.__logger = Logger() self._lemmatizer = Lemmatizer() self._path_service = PathService() # Data self._classifier_name = classifier_name self._text = None self.tonal = None self.probability = 0 self._unigrams = None self._bigrams = None self._trigrams = None self._unigrams_weight = None self._bigrams_weight = None self._trigrams_weight = None self.__logger.info('TextTonalAnalyzer was successfully initialized.', __name__)
def __init__(self): # Services self.__logger = Logger() self._path_service = PathService() self._exceptions_handler = ExceptionsHandler() # Data self._container = ClassificationDataContainer() self._possible_classifiers = ['NBC', 'LogisticRegression', 'KNN'] self.__logger.info('Classifier was successfully initialized.', __name__)
def __init__(self): # Services self.__logger = Logger() self._path_service = PathService() self._configurator = Configurator() self._exceptions_handler = ExceptionsHandler() # Data self._wd = os.getcwd() self._request_url = None self.databases_public_keys = None self.__logger.info('DatabaseCursor was successfully initialized.', __name__)
def __init__(self): # Services self._database_cursor = DatabaseCursor() self._ngram_analyzer = NgramAnalyzer() self.__logger = Logger() self._path_service = PathService() # Data self._docs_count = dict() self._count_all_docs() self.__logger.info('TextWeightCounter was successfully initialized.', __name__)
def __init__(self): # Services self._database_cursor = DatabaseCursor() self.__logger = Logger() self._exceptions_hanlder = ExceptionsHandler() self._lemmatizer = Lemmatizer() self._path_service = PathService() self._configurator = Configurator() self._morph_analyzer = pymorphy2.MorphAnalyzer() # Data self._vec_model = None self._load_vec_model() self.__logger.info('NgramAnalyzer was successfully initialized.', __name__)
def __init__(self): # Services self.__logger = Logger() self._path_service = PathService() self._exceptions_handler = ExceptionsHandler() # Data self._config = dict() self._wd = os.getcwd() self._path_to_databases = None self._request_url = None self._vector_model_public_key = None self._databases_public_keys = None self._load_public_keys() self.__logger.info('Configurator was successfully initialized.', __name__)
def __init__(self): # Services self._spell_checker = SpellChecker() self.__logger = Logger() self._path_service = PathService() self._morph_analyzer = pymorphy2.MorphAnalyzer() # Data self._stop_words = self._read_stop_words() self._parts_of_speech_to_remove = ['NUMR', 'NPRO', 'PREP'] self.__logger.info('Lemmatizer was successfully initialized.', __name__)
def __init__(self): super().__init__() self.os = platform.system().lower() # Services self._speech_recognizer = SpeechRecognizer() self._file_reader = FileReader() self.__logger = Logger() self._path_service = PathService() self._text_tonal_analyzer = TextTonalAnalyzer('NBC') self._config = None self._load_config() # GUI Elements self.line_edit = QLineEdit(self) self.answer_label = QLabel(self) self.voice_button = QPushButton(self) self.answer_button = QPushButton(self) self.file_dialog_button = QPushButton(self) self.delete_button = QPushButton(self) self.message_box = QMessageBox()
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import csv import sqlite3 from Python.Services.DocumentPreparer import DocumentPreparer from Python.Services.Lemmatizer.Lemmatizer import Lemmatizer from Python.Services.PathService import PathService path_service = PathService() lemmatizer = Lemmatizer() document_preparer = DocumentPreparer() def get_all_entries(database): path_to_db = path_service.get_path_to_database(database) connection = sqlite3.connect(path_to_db) cursor = connection.cursor() request = "SELECT * FROM 'Data'" cursor.execute(request) return cursor.fetchall()
class DatabaseCursor: def __init__(self): # Services self.__logger = Logger() self._path_service = PathService() self._configurator = Configurator() self._exceptions_handler = ExceptionsHandler() # Data self._wd = os.getcwd() self._request_url = None self.databases_public_keys = None self.__logger.info('DatabaseCursor was successfully initialized.', __name__) def _load_config(self): path_to_config = os.path.join(self._path_service.path_to_configs, 'database_cursor.json') if os.path.exists(path_to_config): with open(path_to_config, 'r', encoding='utf-8') as file: config = json.load(file) self._request_url = config['request_url'] self.databases_public_keys = config['database_public_keys'] else: self.__logger.error( "Can't load config for DatabaseCursor (doesn't exist).", __name__) def __update_connection(self, ngram): path_to_db = None if ngram.count(' ') == 0: path_to_db = self._path_service.get_path_to_database('unigrams.db') elif ngram.count(' ') == 1: path_to_db = self._path_service.get_path_to_database('bigrams.db') elif ngram.count(' ') == 2: path_to_db = self._path_service.get_path_to_database('trigrams.db') if path_to_db and os.path.exists(path_to_db): self.__logger.info(f'Connected to database: {path_to_db}', __name__) return sqlite3.connect(path_to_db) else: self.__logger.warning(f'Database lost: {path_to_db}', __name__) self.__logger.info('Trying to download database from cloud...', __name__) self._configurator.download_database(path_to_db) self.__logger.info(f'Connected to database: {path_to_db}', __name__) if os.path.exists(path_to_db): return sqlite3.connect(path_to_db) else: self.__logger.fatal("Database doesn't exist.", __name__) def get_entry(self, ngram): connection = self.__update_connection(ngram) cursor = connection.cursor() request = (""" SELECT * FROM 'Data' WHERE Ngram='%s' """) % ngram self.__logger.info(f'Request to DB: {request.strip()}', __name__) try: cursor.execute(request) self.__logger.info('Request is OK.', __name__) except BaseException as exception: connection.close() self.__logger.error( self._exceptions_handler.get_error_message(exception), __name__) return result = cursor.fetchone() self.__logger.info(f'Received data: {str(result)}', __name__) if result: connection.close() return result[1], result[2] else: connection.close() def entry_exists(self, ngram): connection = self.__update_connection(ngram) cursor = connection.cursor() request = (""" SELECT * FROM 'Data' WHERE Ngram='%s' """) % ngram self.__logger.info(f'Request to DB: {request.strip()}', __name__) try: cursor.execute(request) self.__logger.info('Request is OK.', __name__) except BaseException as exception: connection.close() self.__logger.error( self._exceptions_handler.get_error_message(exception), __name__) return if cursor.fetchone(): connection.close() self.__logger.info('Entry exists.', __name__) return True else: connection.close() self.__logger.info("Entry doesn't exist.", __name__) return False
class TextTonalAnalyzer: def __init__(self, classifier_name='NBC'): # Services self._configurator = Configurator() self._configurator.configure_system() self._database_cursor = DatabaseCursor() self._document_preparer = DocumentPreparer() self._text_weight_counter = TextWeightCounter() self._classifier = Classifier() self.__logger = Logger() self._lemmatizer = Lemmatizer() self._path_service = PathService() # Data self._classifier_name = classifier_name self._text = None self.tonal = None self.probability = 0 self._unigrams = None self._bigrams = None self._trigrams = None self._unigrams_weight = None self._bigrams_weight = None self._trigrams_weight = None self.__logger.info('TextTonalAnalyzer was successfully initialized.', __name__) def _reset_data(self): self._text = None self.tonal = None self.probability = 0 self._unigrams = None self._bigrams = None self._trigrams = None self._unigrams_weight = None self._bigrams_weight = None self._trigrams_weight = None self.__logger.info('Data was successfully reset.', __name__) def _document_prepare(self): self._unigrams = self._document_preparer.split_into_unigrams( self._text) self._bigrams = self._document_preparer.split_into_bigrams(self._text) self._trigrams = self._document_preparer.split_into_trigrams( self._text) def _text_in_dataset(self): path_to_dataset = self._path_service.get_path_to_dataset( 'dataset_with_unigrams.csv') with open(path_to_dataset, 'r', encoding='utf-8') as file: dataset = csv.reader(file) for doc in dataset: doc = ''.join(doc).split(';') if doc[0] == self._text: self.tonal = doc[1] self.probability = 1 self.__logger.info('Document is in dataset.', __name__) return True return False def _count_weight_by_unigrams(self): self._unigrams_weight = self._text_weight_counter.count_weight_by_unigrams( self._unigrams) def _count_weight_by_bigrams(self): self._bigrams_weight = self._text_weight_counter.count_weight_by_bigrams( self._bigrams) def _count_weight_by_trigrams(self): self._trigrams_weight = self._text_weight_counter.count_weight_by_trigrams( self._trigrams) def detect_tonal(self, text): self._reset_data() self._text = self._lemmatizer.get_text_initial_form(text) if not self._text: self.tonal = 'Unknown' self.__logger.warning('Text is empty.', __name__) return None self._document_prepare() if not self._text_in_dataset(): threads = list() threads.append( Thread(target=self._count_weight_by_unigrams, args=())) threads.append( Thread(target=self._count_weight_by_bigrams, args=())) threads.append( Thread(target=self._count_weight_by_trigrams, args=())) for thread in threads: thread.start() for thread in threads: while thread.is_alive(): time.sleep(0.1) thread.join() self._classifier.customize(self._unigrams_weight, self._bigrams_weight, self._trigrams_weight, self._classifier_name) self.tonal, self.probability = self._classifier.predict_tonal() self.__logger.page_break()
class NgramAnalyzer: def __init__(self): # Services self._database_cursor = DatabaseCursor() self.__logger = Logger() self._exceptions_hanlder = ExceptionsHandler() self._lemmatizer = Lemmatizer() self._path_service = PathService() self._configurator = Configurator() self._morph_analyzer = pymorphy2.MorphAnalyzer() # Data self._vec_model = None self._load_vec_model() self.__logger.info('NgramAnalyzer was successfully initialized.', __name__) def _load_vec_model(self): if not self._path_service.path_to_vector_model: self.__logger.warning("Vector model doesn't exist.", __name__) self._configurator.download_vector_model() self._path_service.set_path_to_vector_model(os.path.join(self._path_service.path_to_databases, 'ruscorpora_upos_skipgram_300_10_2017.bin.gz')) self.__logger.info('Vector model was successfully downloaded.', __name__) if self._path_service.path_to_vector_model: self._vec_model = gensim.models.KeyedVectors.load_word2vec_format(self._path_service.path_to_vector_model, binary=True) else: self.__logger.error("Vector model doesn't exist.", __name__) def _part_of_speech_detect(self, word): if not word: return part_of_speech = self._morph_analyzer.parse(word)[0].tag.POS if part_of_speech: if re.match(r'ADJ', part_of_speech): return 'ADJ' elif re.match(r'PRT', part_of_speech): return 'PRT' elif part_of_speech == 'INFN': return 'VERB' elif part_of_speech == 'ADVB' or part_of_speech == 'PRED': return 'ADV' elif part_of_speech == 'PRCL': return 'PART' return part_of_speech @staticmethod def _detect_ngram_type(ngram): if not ngram: return if ngram.count(' ') == 0: return 'unigram' elif ngram.count(' ') == 1: return 'bigram' elif ngram.count(' ') == 2: return 'trigram' def _nearest_synonyms_find(self, word, topn): if not self._vec_model or not word or topn <= 0: return nearest_synonyms = list() part_of_speech = self._part_of_speech_detect(word) ngram_type = self._detect_ngram_type(word) if part_of_speech: word = word + '_%s' % self._part_of_speech_detect(word) try: for synonym in self._vec_model.most_similar(positive=[word], topn=topn * 10): found_synonym = self._lemmatizer.get_text_initial_form(synonym[0].split('_')[0]) if found_synonym and self._detect_ngram_type(found_synonym) == ngram_type: nearest_synonyms.append({'word': found_synonym, 'cosine proximity': synonym[1]}) if len(nearest_synonyms) == topn: break except BaseException as exception: self.__logger.warning(self._exceptions_hanlder.get_error_message(exception), __name__) return return nearest_synonyms def relevant_ngram_find(self, ngram): if not ngram: return self.__logger.info(f'Start ngram: {ngram}', __name__) response = {'synonym_found': False, 'content': dict()} if self._detect_ngram_type(ngram) == 'unigram': synonyms_count = 10 nearest_synonyms = self._nearest_synonyms_find(ngram, synonyms_count) if not nearest_synonyms: return response for nearest_synonym in nearest_synonyms: data = self._database_cursor.get_entry(nearest_synonym['word']) if data and data[0]: self.__logger.info(f'Relevant ngram: {nearest_synonym["word"]}', __name__) response['synonym_found'] = True response['content']['synonym'] = nearest_synonym['word'] response['content']['pos_docs'] = data[0] response['content']['neg_docs'] = data[1] return response return response
class TextWeightCounter: def __init__(self): # Services self._database_cursor = DatabaseCursor() self._ngram_analyzer = NgramAnalyzer() self.__logger = Logger() self._path_service = PathService() # Data self._docs_count = dict() self._count_all_docs() self.__logger.info('TextWeightCounter was successfully initialized.', __name__) def _count_docs_in_dataset(self, mode): path_to_dataset = self._path_service.get_path_to_dataset( f'dataset_with_{mode}.csv') with open(path_to_dataset, 'r', encoding='utf-8') as file: negative_docs_shift = 10000 positive_docs = 0 negative_docs = negative_docs_shift for row in csv.reader(file): if ''.join(row).split(';')[1] == 'positive': positive_docs += 1 else: negative_docs += 1 return positive_docs + negative_docs - negative_docs_shift, positive_docs, negative_docs def _count_all_docs(self): modes = ['unigrams', 'bigrams', 'trigrams'] for mode in modes: self._docs_count[mode] = dict() self._docs_count[mode]['all_docs'], self._docs_count[mode]['positive_docs'], \ self._docs_count[mode]['negative_docs'] = self._count_docs_in_dataset(mode) @staticmethod def _detect_ngram_type(ngram): if ngram.count(' ') == 0: return 'unigram' elif ngram.count(' ') == 1: return 'bigram' elif ngram.count(' ') == 2: return 'trigram' def _count_ngram_weight(self, ngram): self.__logger.info(f'Ngram: {ngram}', __name__) ngram_type = self._detect_ngram_type(ngram) delta_tf_idf = 0 self.__logger.info(f'Ngram_type: {ngram_type}', __name__) if self._database_cursor.entry_exists(ngram): pos_docs_word, neg_docs_word = self._database_cursor.get_entry( ngram) delta_tf_idf = math.log10( (self._docs_count[ngram_type + 's']['negative_docs'] * pos_docs_word) / (self._docs_count[ngram_type + 's']['positive_docs'] * neg_docs_word)) else: response = self._ngram_analyzer.relevant_ngram_find(ngram) if response['synonym_found']: if ngram_type == 'unigram': pos_docs_word, neg_docs_word = response['content'][ 'pos_docs'], response['content']['neg_docs'] if (not (pos_docs_word and neg_docs_word)) or ( pos_docs_word == 1 and neg_docs_word == 1): return 0 delta_tf_idf = math.log10( (self._docs_count[ngram_type + 's']['negative_docs'] * pos_docs_word) / (self._docs_count[ngram_type + 's']['positive_docs'] * neg_docs_word)) return delta_tf_idf def count_weight_by_unigrams(self, unigrams): checked_unigrams = list() important_unigrams = list() unigrams_weight = 0 for unigram in unigrams: if unigram not in checked_unigrams: this_doc_unigram = unigrams.count(unigram) unigram_weight = this_doc_unigram * self._count_ngram_weight( unigram) unigrams_weight += unigram_weight checked_unigrams.append(unigram) if unigram_weight: important_unigrams.append(unigram) if len(important_unigrams) >= round( len(unigrams) * 0.6) and important_unigrams: unigrams_weight = unigrams_weight / len(important_unigrams) self.__logger.info(f'Unigrams weight: {unigrams_weight}', __name__) return unigrams_weight def count_weight_by_bigrams(self, bigrams): if not bigrams: return None checked_bigrams = list() important_bigrams = list() bigrams_weight = 0 for bigram in bigrams: if bigram not in checked_bigrams: this_doc_bigram = bigrams.count(bigram) bigram_weight = this_doc_bigram * self._count_ngram_weight( bigram) bigrams_weight += bigram_weight checked_bigrams.append(bigram) if bigram_weight: important_bigrams.append(bigram) if len(important_bigrams) >= len(bigrams) // 2 and important_bigrams: bigrams_weight = bigrams_weight / len(important_bigrams) self.__logger.info(f'Bigrams weight: {bigrams_weight}', __name__) return bigrams_weight def count_weight_by_trigrams(self, trigrams): if not trigrams: return None checked_trigrams = list() important_trigrams = list() trigrams_weight = 0 for trigram in trigrams: if trigram not in checked_trigrams: this_doc_trigram = trigrams.count(trigram) trigram_weight = this_doc_trigram * self._count_ngram_weight( trigram) trigrams_weight += trigram_weight checked_trigrams.append(trigram) if trigram_weight: important_trigrams.append(trigram) if len(important_trigrams) >= round( len(trigrams) * 0.4) and important_trigrams: trigrams_weight = trigrams_weight / len(important_trigrams) self.__logger.info(f'Trigrams weight: {trigrams_weight}', __name__) return trigrams_weight
class Configurator(metaclass=Singleton): def __init__(self): # Services self.__logger = Logger() self._path_service = PathService() self._exceptions_handler = ExceptionsHandler() # Data self._config = dict() self._wd = os.getcwd() self._path_to_databases = None self._request_url = None self._vector_model_public_key = None self._databases_public_keys = None self._load_public_keys() self.__logger.info('Configurator was successfully initialized.', __name__) def _load_public_keys(self): path_to_config = os.path.join(self._path_service.path_to_configs, 'configurator.json') if os.path.exists(path_to_config): with open(path_to_config, 'r', encoding='utf-8') as file: config = json.load(file) self._request_url = config['request_url'] self._vector_model_public_key = config['vector_model_public_key'] self._databases_public_keys = config['databases_public_keys'] else: self.__logger.error( "Can't load config for Configrurator (doesn't exist).", __name__) def download_database(self, path_to_db): database_name = os.path.split(path_to_db)[1] if database_name: try: download_url = requests.get( self._request_url, params={ 'public_key': self._databases_public_keys[database_name] }).json()["href"] with open(path_to_db, 'wb') as database_file: database_file.write(requests.get(download_url).content) self._config[path_to_db] = 'downloaded' except BaseException as exception: self.__logger.error( self._exceptions_handler.get_error_message(exception), __name__) self._config[path_to_db] = 'error' def download_vector_model(self): self._path_service.set_path_to_vector_model( os.path.join(self._path_service.path_to_databases, 'ruscorpora_upos_skipgram_300_10_2017.bin.gz')) try: download_url = requests.get(self._request_url, params={ 'public_key': self._vector_model_public_key }).json()["href"] with open(self._path_service.path_to_vector_model, 'wb') as vec_model: vec_model.write(requests.get(download_url).content) self._config[ 'ruscorpora_upos_skipgram_300_10_2017.bin.gz'] = 'downloaded' except BaseException as exception: self.__logger.error( self._exceptions_handler.get_error_message(exception), __name__) self._config[ 'ruscorpora_upos_skipgram_300_10_2017.bin.gz'] = 'error' def configure_system(self): self._config['datetime'] = str(datetime.now()) for database in ['unigrams.db', 'bigrams.db', 'trigrams.db']: path_to_database = self._path_service.get_path_to_database( database) if not path_to_database or not os.path.exists(path_to_database): self.__logger.warning('Database not found: %s' % str(database), __name__) self.download_database( os.path.join(self._path_service.path_to_databases, database)) else: self._config[database] = 'exists' if not self._path_service.path_to_vector_model or not os.path.exists( self._path_service.path_to_vector_model): self.__logger.warning('Vector model not found.', __name__) self.download_vector_model() else: self._config[ 'ruscorpora_upos_skipgram_300_10_2017.bin.gz'] = 'exists' self._create_config() def _create_config(self): with open(os.path.join('Logs', 'config.json'), 'w', encoding='utf-8') as config: json.dump(self._config, config, indent=4)
class Classifier: def __init__(self): # Services self.__logger = Logger() self._path_service = PathService() self._exceptions_handler = ExceptionsHandler() # Data self._container = ClassificationDataContainer() self._possible_classifiers = ['NBC', 'LogisticRegression', 'KNN'] self.__logger.info('Classifier was successfully initialized.', __name__) def _load_config(self): path_to_config = os.path.join(self._path_service.path_to_configs, 'classifier.json') if os.path.exists(path_to_config): with open(path_to_config, 'r', encoding='utf-8') as file: config = json.load(file) self._possible_classifiers = config['possible_classifiers'] else: self.__logger.error("Can't load Classifier configuration.", __name__) def customize(self, unigrams_weight, bigrams_weight, trigrams_weight, classifier_name='NBC'): self._container.clear() if classifier_name in self._possible_classifiers: self._container.classifiers['name'] = classifier_name else: self._container.classifiers['name'] = 'NBC' self.__logger.error('Got unknown classifier, set default (NBC).', __name__) self._container.weights['unigrams'] = unigrams_weight self._container.weights['bigrams'] = bigrams_weight self._container.weights['trigrams'] = trigrams_weight try: if self._container.weights['unigrams']: self._container.classifiers['unigrams'] = joblib.load( self._path_service.get_path_to_model( 'unigrams', self._container.classifiers['name'])) if self._container.weights['bigrams']: self._container.classifiers['bigrams'] = joblib.load( self._path_service.get_path_to_model( 'bigrams', self._container.classifiers['name'])) if self._container.weights['trigrams']: self._container.classifiers['trigrams'] = joblib.load( self._path_service.get_path_to_model( 'trigrams', self._container.classifiers['name'])) self.__logger.info('Models were successfully loaded.', __name__) self.__logger.info('Classifier was successfully configured.', __name__) except BaseException as exception: self.__logger.fatal( self._exceptions_handler.get_error_message(exception), __name__) def _predict_tonal_by_unigrams(self): self._container.tonalities['unigrams'] = self._container.classifiers[ 'unigrams'].predict(self._container.weights['unigrams'])[0] self._container.probabilities['unigrams'] = max( self._container.classifiers['unigrams'].predict_proba( self._container.weights['unigrams'])[0]) self.__logger.info( f'Unigrams tonal: {self._container.tonalities["unigrams"]}', __name__) self.__logger.info( f'Unigrams probability: {self._container.probabilities["unigrams"]}', __name__) def _predict_tonal_by_unigrams_bigrams(self): self._container.tonalities['bigrams'] = self._container.classifiers[ 'bigrams'].predict([[ self._container.weights['unigrams'], self._container.weights['bigrams'] ]])[0] self._container.probabilities['bigrams'] = max( self._container.classifiers['bigrams'].predict_proba([[ self._container.weights['unigrams'], self._container.weights['bigrams'] ]])[0]) self.__logger.info( f'Bigrams tonal: {self._container.tonalities["bigrams"]}', __name__) self.__logger.info( f'Bigrams probability: {self._container.probabilities["bigrams"]}', __name__) def _predict_tonal_by_unigrams_bigrams_trigrams(self): self._container.tonalities['trigrams'] = self._container.classifiers[ 'trigrams'].predict([[ self._container.weights['unigrams'], self._container.weights['bigrams'], self._container.weights['trigrams'] ]])[0] self._container.probabilities['trigrams'] = max( self._container.classifiers['trigrams'].predict_proba([[ self._container.weights['unigrams'], self._container.weights['bigrams'], self._container.weights['trigrams'] ]])[0]) self.__logger.info( f'Trigrams tonal: {self._container.tonalities["trigrams"]}', __name__) self.__logger.info( f'Trigrams probability: {self._container.probabilities["trigrams"]}', __name__) def _predict_intermediate_tonalities(self): threads = list() if self._container.weights['unigrams']: threads.append( Thread(target=self._predict_tonal_by_unigrams, args=())) if self._container.weights['bigrams']: threads.append( Thread(target=self._predict_tonal_by_unigrams_bigrams, args=())) if self._container.weights['trigrams']: threads.append( Thread(target=self._predict_tonal_by_unigrams_bigrams_trigrams, args=())) for thread in threads: thread.start() for thread in threads: while thread.is_alive(): time.sleep(0.1) thread.join() def _select_final_tonal(self): if self._container.tonalities['unigrams'] and self._container.tonalities['bigrams'] and \ self._container.tonalities['trigrams']: if self._container.tonalities[ 'unigrams'] == self._container.tonalities['bigrams']: self._container.tonalities[ 'final'] = self._container.tonalities['unigrams'] self._container.probabilities['final'] = max( self._container.probabilities['unigrams'], self._container.probabilities['bigrams']) elif self._container.tonalities[ 'unigrams'] == self._container.tonalities['trigrams']: self._container.tonalities[ 'final'] = self._container.tonalities['unigrams'] self._container.probabilities['final'] = max( self._container.probabilities['unigrams'], self._container.probabilities['trigrams']) elif self._container.tonalities[ 'bigrams'] == self._container.tonalities['trigrams']: self._container.tonalities[ 'final'] = self._container.tonalities['bigrams'] self._container.probabilities['final'] = max( self._container.probabilities['bigrams'], self._container.probabilities['trigrams']) elif self._container.tonalities[ 'unigrams'] and self._container.tonalities['bigrams']: if self._container.tonalities[ 'unigrams'] != self._container.tonalities['bigrams']: if self._container.probabilities[ 'unigrams'] >= self._container.probabilities['bigrams']: self._container.tonalities[ 'final'] = self._container.tonalities['unigrams'] self._container.probabilities[ 'final'] = self._container.probabilities['unigrams'] else: self._container.tonalities[ 'final'] = self._container.tonalities['bigrams'] self._container.probabilities[ 'final'] = self._container.probabilities['bigrams'] elif self._container.tonalities[ 'unigrams'] == self._container.tonalities['bigrams']: self._container.tonalities[ 'final'] = self._container.tonalities['unigrams'] self._container.probabilities['final'] = max( self._container.probabilities['bigrams'], self._container.probabilities['unigrams']) elif self._container.tonalities['unigrams']: self._container.tonalities['final'] = self._container.tonalities[ 'unigrams'] self._container.probabilities[ 'final'] = self._container.probabilities['unigrams'] def predict_tonal(self): self._predict_intermediate_tonalities() self._select_final_tonal() self.__logger.info( f'Final tonal: {self._container.tonalities["final"]}', __name__) self.__logger.info( f'Final probability: {self._container.probabilities["final"]}', __name__) return self._container.tonalities[ 'final'], self._container.probabilities['final']