Python PathService примеры, Python.Services.PathService.PathService Python примеры использования

Пример #1

0

Показать файл

Файл: TextTonalAnalyzer.py Проект: rosspeckomplekt/Text_tone_analyzer

    def __init__(self, classifier_name='NBC'):
        # Services
        self._configurator = Configurator()
        self._configurator.configure_system()

        self._database_cursor = DatabaseCursor()
        self._document_preparer = DocumentPreparer()
        self._text_weight_counter = TextWeightCounter()
        self._classifier = Classifier()
        self.__logger = Logger()
        self._lemmatizer = Lemmatizer()
        self._path_service = PathService()

        # Data
        self._classifier_name = classifier_name

        self._text = None
        self.tonal = None
        self.probability = 0

        self._unigrams = None
        self._bigrams = None
        self._trigrams = None

        self._unigrams_weight = None
        self._bigrams_weight = None
        self._trigrams_weight = None

        self.__logger.info('TextTonalAnalyzer was successfully initialized.',
                           __name__)

Пример #2

0

Показать файл

    def __init__(self):
        # Services
        self.__logger = Logger()
        self._path_service = PathService()
        self._exceptions_handler = ExceptionsHandler()

        # Data
        self._container = ClassificationDataContainer()
        self._possible_classifiers = ['NBC', 'LogisticRegression', 'KNN']

        self.__logger.info('Classifier was successfully initialized.',
                           __name__)

Пример #3

0

Показать файл

Файл: DatabaseCursor.py Проект: vanavski/Text_tone_analyzer

    def __init__(self):
        # Services
        self.__logger = Logger()
        self._path_service = PathService()
        self._configurator = Configurator()
        self._exceptions_handler = ExceptionsHandler()

        # Data
        self._wd = os.getcwd()
        self._request_url = None
        self.databases_public_keys = None

        self.__logger.info('DatabaseCursor was successfully initialized.',
                           __name__)

Пример #4

0

Показать файл

    def __init__(self):
        # Services
        self._database_cursor = DatabaseCursor()
        self._ngram_analyzer = NgramAnalyzer()
        self.__logger = Logger()
        self._path_service = PathService()

        # Data
        self._docs_count = dict()

        self._count_all_docs()

        self.__logger.info('TextWeightCounter was successfully initialized.',
                           __name__)

Пример #5

0

Показать файл

    def __init__(self):
        # Services
        self._database_cursor = DatabaseCursor()
        self.__logger = Logger()
        self._exceptions_hanlder = ExceptionsHandler()
        self._lemmatizer = Lemmatizer()
        self._path_service = PathService()
        self._configurator = Configurator()
        self._morph_analyzer = pymorphy2.MorphAnalyzer()

        # Data
        self._vec_model = None

        self._load_vec_model()

        self.__logger.info('NgramAnalyzer was successfully initialized.', __name__)

Пример #6

0

Показать файл

Файл: Configurator.py Проект: vanavski/Text_tone_analyzer

    def __init__(self):
        # Services
        self.__logger = Logger()
        self._path_service = PathService()
        self._exceptions_handler = ExceptionsHandler()

        # Data
        self._config = dict()
        self._wd = os.getcwd()
        self._path_to_databases = None
        self._request_url = None
        self._vector_model_public_key = None
        self._databases_public_keys = None

        self._load_public_keys()

        self.__logger.info('Configurator was successfully initialized.',
                           __name__)

Пример #7

0

Показать файл

Файл: Lemmatizer.py Проект: vanavski/Text_tone_analyzer

    def __init__(self):
        # Services
        self._spell_checker = SpellChecker()
        self.__logger = Logger()
        self._path_service = PathService()
        self._morph_analyzer = pymorphy2.MorphAnalyzer()

        # Data
        self._stop_words = self._read_stop_words()
        self._parts_of_speech_to_remove = ['NUMR', 'NPRO', 'PREP']

        self.__logger.info('Lemmatizer was successfully initialized.',
                           __name__)

Пример #8

0

Показать файл

Файл: Demo.py Проект: vanavski/Text_tone_analyzer

    def __init__(self):
        super().__init__()
        self.os = platform.system().lower()

        # Services
        self._speech_recognizer = SpeechRecognizer()
        self._file_reader = FileReader()
        self.__logger = Logger()
        self._path_service = PathService()
        self._text_tonal_analyzer = TextTonalAnalyzer('NBC')

        self._config = None
        self._load_config()

        # GUI Elements
        self.line_edit = QLineEdit(self)
        self.answer_label = QLabel(self)
        self.voice_button = QPushButton(self)
        self.answer_button = QPushButton(self)
        self.file_dialog_button = QPushButton(self)
        self.delete_button = QPushButton(self)
        self.message_box = QMessageBox()

Пример #9

0

Показать файл

#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import csv
import sqlite3

from Python.Services.DocumentPreparer import DocumentPreparer
from Python.Services.Lemmatizer.Lemmatizer import Lemmatizer
from Python.Services.PathService import PathService

path_service = PathService()
lemmatizer = Lemmatizer()
document_preparer = DocumentPreparer()


def get_all_entries(database):
    path_to_db = path_service.get_path_to_database(database)

    connection = sqlite3.connect(path_to_db)
    cursor = connection.cursor()

    request = "SELECT * FROM 'Data'"
    cursor.execute(request)

    return cursor.fetchall()

Пример #10

0

Показать файл

Файл: DatabaseCursor.py Проект: vanavski/Text_tone_analyzer

class DatabaseCursor:
    def __init__(self):
        # Services
        self.__logger = Logger()
        self._path_service = PathService()
        self._configurator = Configurator()
        self._exceptions_handler = ExceptionsHandler()

        # Data
        self._wd = os.getcwd()
        self._request_url = None
        self.databases_public_keys = None

        self.__logger.info('DatabaseCursor was successfully initialized.',
                           __name__)

    def _load_config(self):
        path_to_config = os.path.join(self._path_service.path_to_configs,
                                      'database_cursor.json')

        if os.path.exists(path_to_config):
            with open(path_to_config, 'r', encoding='utf-8') as file:
                config = json.load(file)

            self._request_url = config['request_url']
            self.databases_public_keys = config['database_public_keys']
        else:
            self.__logger.error(
                "Can't load config for DatabaseCursor (doesn't exist).",
                __name__)

    def __update_connection(self, ngram):
        path_to_db = None

        if ngram.count(' ') == 0:
            path_to_db = self._path_service.get_path_to_database('unigrams.db')

        elif ngram.count(' ') == 1:
            path_to_db = self._path_service.get_path_to_database('bigrams.db')

        elif ngram.count(' ') == 2:
            path_to_db = self._path_service.get_path_to_database('trigrams.db')

        if path_to_db and os.path.exists(path_to_db):
            self.__logger.info(f'Connected to database: {path_to_db}',
                               __name__)

            return sqlite3.connect(path_to_db)

        else:
            self.__logger.warning(f'Database lost: {path_to_db}', __name__)
            self.__logger.info('Trying to download database from cloud...',
                               __name__)

            self._configurator.download_database(path_to_db)

            self.__logger.info(f'Connected to database: {path_to_db}',
                               __name__)

            if os.path.exists(path_to_db):
                return sqlite3.connect(path_to_db)
            else:
                self.__logger.fatal("Database doesn't exist.", __name__)

    def get_entry(self, ngram):
        connection = self.__update_connection(ngram)
        cursor = connection.cursor()

        request = ("""
        SELECT * FROM 'Data' WHERE Ngram='%s'
        """) % ngram

        self.__logger.info(f'Request to DB: {request.strip()}', __name__)

        try:
            cursor.execute(request)
            self.__logger.info('Request is OK.', __name__)

        except BaseException as exception:
            connection.close()

            self.__logger.error(
                self._exceptions_handler.get_error_message(exception),
                __name__)
            return

        result = cursor.fetchone()
        self.__logger.info(f'Received data: {str(result)}', __name__)

        if result:
            connection.close()

            return result[1], result[2]

        else:
            connection.close()

    def entry_exists(self, ngram):
        connection = self.__update_connection(ngram)
        cursor = connection.cursor()

        request = ("""
        SELECT * FROM 'Data' WHERE Ngram='%s'
        """) % ngram

        self.__logger.info(f'Request to DB: {request.strip()}', __name__)

        try:
            cursor.execute(request)
            self.__logger.info('Request is OK.', __name__)

        except BaseException as exception:
            connection.close()

            self.__logger.error(
                self._exceptions_handler.get_error_message(exception),
                __name__)
            return

        if cursor.fetchone():
            connection.close()

            self.__logger.info('Entry exists.', __name__)
            return True

        else:
            connection.close()

            self.__logger.info("Entry doesn't exist.", __name__)
            return False

Пример #11

0

Показать файл

Файл: TextTonalAnalyzer.py Проект: rosspeckomplekt/Text_tone_analyzer

class TextTonalAnalyzer:
    def __init__(self, classifier_name='NBC'):
        # Services
        self._configurator = Configurator()
        self._configurator.configure_system()

        self._database_cursor = DatabaseCursor()
        self._document_preparer = DocumentPreparer()
        self._text_weight_counter = TextWeightCounter()
        self._classifier = Classifier()
        self.__logger = Logger()
        self._lemmatizer = Lemmatizer()
        self._path_service = PathService()

        # Data
        self._classifier_name = classifier_name

        self._text = None
        self.tonal = None
        self.probability = 0

        self._unigrams = None
        self._bigrams = None
        self._trigrams = None

        self._unigrams_weight = None
        self._bigrams_weight = None
        self._trigrams_weight = None

        self.__logger.info('TextTonalAnalyzer was successfully initialized.',
                           __name__)

    def _reset_data(self):
        self._text = None
        self.tonal = None
        self.probability = 0

        self._unigrams = None
        self._bigrams = None
        self._trigrams = None

        self._unigrams_weight = None
        self._bigrams_weight = None
        self._trigrams_weight = None

        self.__logger.info('Data was successfully reset.', __name__)

    def _document_prepare(self):
        self._unigrams = self._document_preparer.split_into_unigrams(
            self._text)
        self._bigrams = self._document_preparer.split_into_bigrams(self._text)
        self._trigrams = self._document_preparer.split_into_trigrams(
            self._text)

    def _text_in_dataset(self):
        path_to_dataset = self._path_service.get_path_to_dataset(
            'dataset_with_unigrams.csv')

        with open(path_to_dataset, 'r', encoding='utf-8') as file:
            dataset = csv.reader(file)
            for doc in dataset:
                doc = ''.join(doc).split(';')
                if doc[0] == self._text:
                    self.tonal = doc[1]
                    self.probability = 1

                    self.__logger.info('Document is in dataset.', __name__)
                    return True

        return False

    def _count_weight_by_unigrams(self):
        self._unigrams_weight = self._text_weight_counter.count_weight_by_unigrams(
            self._unigrams)

    def _count_weight_by_bigrams(self):
        self._bigrams_weight = self._text_weight_counter.count_weight_by_bigrams(
            self._bigrams)

    def _count_weight_by_trigrams(self):
        self._trigrams_weight = self._text_weight_counter.count_weight_by_trigrams(
            self._trigrams)

    def detect_tonal(self, text):
        self._reset_data()

        self._text = self._lemmatizer.get_text_initial_form(text)

        if not self._text:
            self.tonal = 'Unknown'

            self.__logger.warning('Text is empty.', __name__)
            return None

        self._document_prepare()

        if not self._text_in_dataset():
            threads = list()

            threads.append(
                Thread(target=self._count_weight_by_unigrams, args=()))
            threads.append(
                Thread(target=self._count_weight_by_bigrams, args=()))
            threads.append(
                Thread(target=self._count_weight_by_trigrams, args=()))

            for thread in threads:
                thread.start()

            for thread in threads:
                while thread.is_alive():
                    time.sleep(0.1)

                thread.join()

            self._classifier.customize(self._unigrams_weight,
                                       self._bigrams_weight,
                                       self._trigrams_weight,
                                       self._classifier_name)

            self.tonal, self.probability = self._classifier.predict_tonal()

            self.__logger.page_break()

Пример #12

0

Показать файл

class NgramAnalyzer:
    def __init__(self):
        # Services
        self._database_cursor = DatabaseCursor()
        self.__logger = Logger()
        self._exceptions_hanlder = ExceptionsHandler()
        self._lemmatizer = Lemmatizer()
        self._path_service = PathService()
        self._configurator = Configurator()
        self._morph_analyzer = pymorphy2.MorphAnalyzer()

        # Data
        self._vec_model = None

        self._load_vec_model()

        self.__logger.info('NgramAnalyzer was successfully initialized.', __name__)

    def _load_vec_model(self):
        if not self._path_service.path_to_vector_model:
            self.__logger.warning("Vector model doesn't exist.", __name__)

            self._configurator.download_vector_model()
            self._path_service.set_path_to_vector_model(os.path.join(self._path_service.path_to_databases,
                                                                     'ruscorpora_upos_skipgram_300_10_2017.bin.gz'))
            self.__logger.info('Vector model was successfully downloaded.', __name__)

        if self._path_service.path_to_vector_model:
            self._vec_model = gensim.models.KeyedVectors.load_word2vec_format(self._path_service.path_to_vector_model,
                                                                              binary=True)
        else:
            self.__logger.error("Vector model doesn't exist.", __name__)

    def _part_of_speech_detect(self, word):
        if not word:
            return

        part_of_speech = self._morph_analyzer.parse(word)[0].tag.POS

        if part_of_speech:
            if re.match(r'ADJ', part_of_speech):
                return 'ADJ'

            elif re.match(r'PRT', part_of_speech):
                return 'PRT'

            elif part_of_speech == 'INFN':
                return 'VERB'

            elif part_of_speech == 'ADVB' or part_of_speech == 'PRED':
                return 'ADV'

            elif part_of_speech == 'PRCL':
                return 'PART'

        return part_of_speech

    @staticmethod
    def _detect_ngram_type(ngram):
        if not ngram:
            return

        if ngram.count(' ') == 0:
            return 'unigram'

        elif ngram.count(' ') == 1:
            return 'bigram'

        elif ngram.count(' ') == 2:
            return 'trigram'

    def _nearest_synonyms_find(self, word, topn):
        if not self._vec_model or not word or topn <= 0:
            return

        nearest_synonyms = list()
        part_of_speech = self._part_of_speech_detect(word)
        ngram_type = self._detect_ngram_type(word)

        if part_of_speech:
            word = word + '_%s' % self._part_of_speech_detect(word)

        try:
            for synonym in self._vec_model.most_similar(positive=[word], topn=topn * 10):
                found_synonym = self._lemmatizer.get_text_initial_form(synonym[0].split('_')[0])

                if found_synonym and self._detect_ngram_type(found_synonym) == ngram_type:
                    nearest_synonyms.append({'word': found_synonym,
                                             'cosine proximity': synonym[1]})

                if len(nearest_synonyms) == topn:
                    break

        except BaseException as exception:
            self.__logger.warning(self._exceptions_hanlder.get_error_message(exception), __name__)
            return

        return nearest_synonyms

    def relevant_ngram_find(self, ngram):
        if not ngram:
            return

        self.__logger.info(f'Start ngram: {ngram}', __name__)

        response = {'synonym_found': False, 'content': dict()}

        if self._detect_ngram_type(ngram) == 'unigram':
            synonyms_count = 10
            nearest_synonyms = self._nearest_synonyms_find(ngram, synonyms_count)

            if not nearest_synonyms:
                return response

            for nearest_synonym in nearest_synonyms:
                data = self._database_cursor.get_entry(nearest_synonym['word'])

                if data and data[0]:
                    self.__logger.info(f'Relevant ngram: {nearest_synonym["word"]}', __name__)

                    response['synonym_found'] = True

                    response['content']['synonym'] = nearest_synonym['word']
                    response['content']['pos_docs'] = data[0]
                    response['content']['neg_docs'] = data[1]

                    return response

        return response

Пример #13

0

Показать файл

class TextWeightCounter:
    def __init__(self):
        # Services
        self._database_cursor = DatabaseCursor()
        self._ngram_analyzer = NgramAnalyzer()
        self.__logger = Logger()
        self._path_service = PathService()

        # Data
        self._docs_count = dict()

        self._count_all_docs()

        self.__logger.info('TextWeightCounter was successfully initialized.',
                           __name__)

    def _count_docs_in_dataset(self, mode):
        path_to_dataset = self._path_service.get_path_to_dataset(
            f'dataset_with_{mode}.csv')

        with open(path_to_dataset, 'r', encoding='utf-8') as file:
            negative_docs_shift = 10000

            positive_docs = 0
            negative_docs = negative_docs_shift

            for row in csv.reader(file):
                if ''.join(row).split(';')[1] == 'positive':
                    positive_docs += 1
                else:
                    negative_docs += 1

        return positive_docs + negative_docs - negative_docs_shift, positive_docs, negative_docs

    def _count_all_docs(self):
        modes = ['unigrams', 'bigrams', 'trigrams']

        for mode in modes:
            self._docs_count[mode] = dict()
            self._docs_count[mode]['all_docs'], self._docs_count[mode]['positive_docs'], \
            self._docs_count[mode]['negative_docs'] = self._count_docs_in_dataset(mode)

    @staticmethod
    def _detect_ngram_type(ngram):
        if ngram.count(' ') == 0:
            return 'unigram'

        elif ngram.count(' ') == 1:
            return 'bigram'

        elif ngram.count(' ') == 2:
            return 'trigram'

    def _count_ngram_weight(self, ngram):
        self.__logger.info(f'Ngram: {ngram}', __name__)

        ngram_type = self._detect_ngram_type(ngram)
        delta_tf_idf = 0

        self.__logger.info(f'Ngram_type: {ngram_type}', __name__)

        if self._database_cursor.entry_exists(ngram):
            pos_docs_word, neg_docs_word = self._database_cursor.get_entry(
                ngram)

            delta_tf_idf = math.log10(
                (self._docs_count[ngram_type + 's']['negative_docs'] *
                 pos_docs_word) /
                (self._docs_count[ngram_type + 's']['positive_docs'] *
                 neg_docs_word))

        else:
            response = self._ngram_analyzer.relevant_ngram_find(ngram)

            if response['synonym_found']:

                if ngram_type == 'unigram':
                    pos_docs_word, neg_docs_word = response['content'][
                        'pos_docs'], response['content']['neg_docs']

                    if (not (pos_docs_word and neg_docs_word)) or (
                            pos_docs_word == 1 and neg_docs_word == 1):
                        return 0

                    delta_tf_idf = math.log10(
                        (self._docs_count[ngram_type + 's']['negative_docs'] *
                         pos_docs_word) /
                        (self._docs_count[ngram_type + 's']['positive_docs'] *
                         neg_docs_word))

        return delta_tf_idf

    def count_weight_by_unigrams(self, unigrams):
        checked_unigrams = list()
        important_unigrams = list()
        unigrams_weight = 0

        for unigram in unigrams:
            if unigram not in checked_unigrams:
                this_doc_unigram = unigrams.count(unigram)
                unigram_weight = this_doc_unigram * self._count_ngram_weight(
                    unigram)
                unigrams_weight += unigram_weight

                checked_unigrams.append(unigram)

                if unigram_weight:
                    important_unigrams.append(unigram)

        if len(important_unigrams) >= round(
                len(unigrams) * 0.6) and important_unigrams:
            unigrams_weight = unigrams_weight / len(important_unigrams)

            self.__logger.info(f'Unigrams weight: {unigrams_weight}', __name__)

            return unigrams_weight

    def count_weight_by_bigrams(self, bigrams):
        if not bigrams:
            return None

        checked_bigrams = list()
        important_bigrams = list()
        bigrams_weight = 0

        for bigram in bigrams:
            if bigram not in checked_bigrams:
                this_doc_bigram = bigrams.count(bigram)
                bigram_weight = this_doc_bigram * self._count_ngram_weight(
                    bigram)
                bigrams_weight += bigram_weight

                checked_bigrams.append(bigram)

                if bigram_weight:
                    important_bigrams.append(bigram)

        if len(important_bigrams) >= len(bigrams) // 2 and important_bigrams:
            bigrams_weight = bigrams_weight / len(important_bigrams)

            self.__logger.info(f'Bigrams weight: {bigrams_weight}', __name__)

            return bigrams_weight

    def count_weight_by_trigrams(self, trigrams):
        if not trigrams:
            return None

        checked_trigrams = list()
        important_trigrams = list()
        trigrams_weight = 0

        for trigram in trigrams:
            if trigram not in checked_trigrams:
                this_doc_trigram = trigrams.count(trigram)
                trigram_weight = this_doc_trigram * self._count_ngram_weight(
                    trigram)
                trigrams_weight += trigram_weight

                checked_trigrams.append(trigram)

                if trigram_weight:
                    important_trigrams.append(trigram)

        if len(important_trigrams) >= round(
                len(trigrams) * 0.4) and important_trigrams:
            trigrams_weight = trigrams_weight / len(important_trigrams)

            self.__logger.info(f'Trigrams weight: {trigrams_weight}', __name__)

            return trigrams_weight

Пример #14

0

Показать файл

Файл: Configurator.py Проект: vanavski/Text_tone_analyzer

class Configurator(metaclass=Singleton):
    def __init__(self):
        # Services
        self.__logger = Logger()
        self._path_service = PathService()
        self._exceptions_handler = ExceptionsHandler()

        # Data
        self._config = dict()
        self._wd = os.getcwd()
        self._path_to_databases = None
        self._request_url = None
        self._vector_model_public_key = None
        self._databases_public_keys = None

        self._load_public_keys()

        self.__logger.info('Configurator was successfully initialized.',
                           __name__)

    def _load_public_keys(self):
        path_to_config = os.path.join(self._path_service.path_to_configs,
                                      'configurator.json')

        if os.path.exists(path_to_config):
            with open(path_to_config, 'r', encoding='utf-8') as file:
                config = json.load(file)

            self._request_url = config['request_url']
            self._vector_model_public_key = config['vector_model_public_key']
            self._databases_public_keys = config['databases_public_keys']

        else:
            self.__logger.error(
                "Can't load config for Configrurator (doesn't exist).",
                __name__)

    def download_database(self, path_to_db):
        database_name = os.path.split(path_to_db)[1]

        if database_name:
            try:
                download_url = requests.get(
                    self._request_url,
                    params={
                        'public_key':
                        self._databases_public_keys[database_name]
                    }).json()["href"]

                with open(path_to_db, 'wb') as database_file:
                    database_file.write(requests.get(download_url).content)

                self._config[path_to_db] = 'downloaded'

            except BaseException as exception:
                self.__logger.error(
                    self._exceptions_handler.get_error_message(exception),
                    __name__)
                self._config[path_to_db] = 'error'

    def download_vector_model(self):
        self._path_service.set_path_to_vector_model(
            os.path.join(self._path_service.path_to_databases,
                         'ruscorpora_upos_skipgram_300_10_2017.bin.gz'))

        try:
            download_url = requests.get(self._request_url,
                                        params={
                                            'public_key':
                                            self._vector_model_public_key
                                        }).json()["href"]

            with open(self._path_service.path_to_vector_model,
                      'wb') as vec_model:
                vec_model.write(requests.get(download_url).content)

            self._config[
                'ruscorpora_upos_skipgram_300_10_2017.bin.gz'] = 'downloaded'

        except BaseException as exception:
            self.__logger.error(
                self._exceptions_handler.get_error_message(exception),
                __name__)

            self._config[
                'ruscorpora_upos_skipgram_300_10_2017.bin.gz'] = 'error'

    def configure_system(self):
        self._config['datetime'] = str(datetime.now())

        for database in ['unigrams.db', 'bigrams.db', 'trigrams.db']:
            path_to_database = self._path_service.get_path_to_database(
                database)

            if not path_to_database or not os.path.exists(path_to_database):
                self.__logger.warning('Database not found: %s' % str(database),
                                      __name__)
                self.download_database(
                    os.path.join(self._path_service.path_to_databases,
                                 database))
            else:
                self._config[database] = 'exists'

        if not self._path_service.path_to_vector_model or not os.path.exists(
                self._path_service.path_to_vector_model):
            self.__logger.warning('Vector model not found.', __name__)
            self.download_vector_model()
        else:
            self._config[
                'ruscorpora_upos_skipgram_300_10_2017.bin.gz'] = 'exists'

        self._create_config()

    def _create_config(self):
        with open(os.path.join('Logs', 'config.json'), 'w',
                  encoding='utf-8') as config:
            json.dump(self._config, config, indent=4)

Пример #15

0

Показать файл

class Classifier:
    def __init__(self):
        # Services
        self.__logger = Logger()
        self._path_service = PathService()
        self._exceptions_handler = ExceptionsHandler()

        # Data
        self._container = ClassificationDataContainer()
        self._possible_classifiers = ['NBC', 'LogisticRegression', 'KNN']

        self.__logger.info('Classifier was successfully initialized.',
                           __name__)

    def _load_config(self):
        path_to_config = os.path.join(self._path_service.path_to_configs,
                                      'classifier.json')

        if os.path.exists(path_to_config):
            with open(path_to_config, 'r', encoding='utf-8') as file:
                config = json.load(file)

            self._possible_classifiers = config['possible_classifiers']
        else:
            self.__logger.error("Can't load Classifier configuration.",
                                __name__)

    def customize(self,
                  unigrams_weight,
                  bigrams_weight,
                  trigrams_weight,
                  classifier_name='NBC'):
        self._container.clear()

        if classifier_name in self._possible_classifiers:
            self._container.classifiers['name'] = classifier_name
        else:
            self._container.classifiers['name'] = 'NBC'
            self.__logger.error('Got unknown classifier, set default (NBC).',
                                __name__)

        self._container.weights['unigrams'] = unigrams_weight
        self._container.weights['bigrams'] = bigrams_weight
        self._container.weights['trigrams'] = trigrams_weight

        try:
            if self._container.weights['unigrams']:
                self._container.classifiers['unigrams'] = joblib.load(
                    self._path_service.get_path_to_model(
                        'unigrams', self._container.classifiers['name']))

            if self._container.weights['bigrams']:
                self._container.classifiers['bigrams'] = joblib.load(
                    self._path_service.get_path_to_model(
                        'bigrams', self._container.classifiers['name']))

            if self._container.weights['trigrams']:
                self._container.classifiers['trigrams'] = joblib.load(
                    self._path_service.get_path_to_model(
                        'trigrams', self._container.classifiers['name']))

            self.__logger.info('Models were successfully loaded.', __name__)
            self.__logger.info('Classifier was successfully configured.',
                               __name__)

        except BaseException as exception:
            self.__logger.fatal(
                self._exceptions_handler.get_error_message(exception),
                __name__)

    def _predict_tonal_by_unigrams(self):
        self._container.tonalities['unigrams'] = self._container.classifiers[
            'unigrams'].predict(self._container.weights['unigrams'])[0]

        self._container.probabilities['unigrams'] = max(
            self._container.classifiers['unigrams'].predict_proba(
                self._container.weights['unigrams'])[0])

        self.__logger.info(
            f'Unigrams tonal: {self._container.tonalities["unigrams"]}',
            __name__)
        self.__logger.info(
            f'Unigrams probability: {self._container.probabilities["unigrams"]}',
            __name__)

    def _predict_tonal_by_unigrams_bigrams(self):
        self._container.tonalities['bigrams'] = self._container.classifiers[
            'bigrams'].predict([[
                self._container.weights['unigrams'],
                self._container.weights['bigrams']
            ]])[0]

        self._container.probabilities['bigrams'] = max(
            self._container.classifiers['bigrams'].predict_proba([[
                self._container.weights['unigrams'],
                self._container.weights['bigrams']
            ]])[0])

        self.__logger.info(
            f'Bigrams tonal: {self._container.tonalities["bigrams"]}',
            __name__)
        self.__logger.info(
            f'Bigrams probability: {self._container.probabilities["bigrams"]}',
            __name__)

    def _predict_tonal_by_unigrams_bigrams_trigrams(self):
        self._container.tonalities['trigrams'] = self._container.classifiers[
            'trigrams'].predict([[
                self._container.weights['unigrams'],
                self._container.weights['bigrams'],
                self._container.weights['trigrams']
            ]])[0]

        self._container.probabilities['trigrams'] = max(
            self._container.classifiers['trigrams'].predict_proba([[
                self._container.weights['unigrams'],
                self._container.weights['bigrams'],
                self._container.weights['trigrams']
            ]])[0])

        self.__logger.info(
            f'Trigrams tonal: {self._container.tonalities["trigrams"]}',
            __name__)
        self.__logger.info(
            f'Trigrams probability: {self._container.probabilities["trigrams"]}',
            __name__)

    def _predict_intermediate_tonalities(self):
        threads = list()

        if self._container.weights['unigrams']:
            threads.append(
                Thread(target=self._predict_tonal_by_unigrams, args=()))

        if self._container.weights['bigrams']:
            threads.append(
                Thread(target=self._predict_tonal_by_unigrams_bigrams,
                       args=()))

        if self._container.weights['trigrams']:
            threads.append(
                Thread(target=self._predict_tonal_by_unigrams_bigrams_trigrams,
                       args=()))

        for thread in threads:
            thread.start()

        for thread in threads:
            while thread.is_alive():
                time.sleep(0.1)

            thread.join()

    def _select_final_tonal(self):
        if self._container.tonalities['unigrams'] and self._container.tonalities['bigrams'] and \
                self._container.tonalities['trigrams']:

            if self._container.tonalities[
                    'unigrams'] == self._container.tonalities['bigrams']:
                self._container.tonalities[
                    'final'] = self._container.tonalities['unigrams']
                self._container.probabilities['final'] = max(
                    self._container.probabilities['unigrams'],
                    self._container.probabilities['bigrams'])

            elif self._container.tonalities[
                    'unigrams'] == self._container.tonalities['trigrams']:
                self._container.tonalities[
                    'final'] = self._container.tonalities['unigrams']
                self._container.probabilities['final'] = max(
                    self._container.probabilities['unigrams'],
                    self._container.probabilities['trigrams'])

            elif self._container.tonalities[
                    'bigrams'] == self._container.tonalities['trigrams']:
                self._container.tonalities[
                    'final'] = self._container.tonalities['bigrams']
                self._container.probabilities['final'] = max(
                    self._container.probabilities['bigrams'],
                    self._container.probabilities['trigrams'])

        elif self._container.tonalities[
                'unigrams'] and self._container.tonalities['bigrams']:

            if self._container.tonalities[
                    'unigrams'] != self._container.tonalities['bigrams']:
                if self._container.probabilities[
                        'unigrams'] >= self._container.probabilities['bigrams']:
                    self._container.tonalities[
                        'final'] = self._container.tonalities['unigrams']
                    self._container.probabilities[
                        'final'] = self._container.probabilities['unigrams']

                else:
                    self._container.tonalities[
                        'final'] = self._container.tonalities['bigrams']
                    self._container.probabilities[
                        'final'] = self._container.probabilities['bigrams']

            elif self._container.tonalities[
                    'unigrams'] == self._container.tonalities['bigrams']:
                self._container.tonalities[
                    'final'] = self._container.tonalities['unigrams']
                self._container.probabilities['final'] = max(
                    self._container.probabilities['bigrams'],
                    self._container.probabilities['unigrams'])

        elif self._container.tonalities['unigrams']:
            self._container.tonalities['final'] = self._container.tonalities[
                'unigrams']
            self._container.probabilities[
                'final'] = self._container.probabilities['unigrams']

    def predict_tonal(self):
        self._predict_intermediate_tonalities()
        self._select_final_tonal()

        self.__logger.info(
            f'Final tonal: {self._container.tonalities["final"]}', __name__)
        self.__logger.info(
            f'Final probability: {self._container.probabilities["final"]}',
            __name__)

        return self._container.tonalities[
            'final'], self._container.probabilities['final']

Python PathService примеры использования