Python Logger.warning примеры использования

Язык программирования: Python

Пространство имен/Пакет: Python.Services.Logger

Класс/Тип: Logger

Метод/Функция: warning

Примеров на hotexamples.com: 9

Python Logger.warning - 9 примеров найдено. Это лучшие примеры Python кода для Python.Services.Logger.Logger.warning, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Logger(14)

info(14)

warning(9)

error(8)

fatal(4)

page_break(1)

Пример #1

Показать файл

Файл: SpeechRecognizer.py Проект: vanavski/Text_tone_analyzer

class SpeechRecognizer:
    def __init__(self):
        # Services
        self.__recognizer = sr.Recognizer()
        self.__logger = Logger()
        self._exceptions_handler = ExceptionsHandler()

        self.__logger.info('SpeechRecognizer was successfully initialized.', __name__)

    def recognize_speech(self):
        while True:
            try:
                with sr.Microphone() as source:
                    speech = self.__recognizer.listen(source)

            except BaseException as exception:
                error_message = self._exceptions_handler.get_error_message(exception)

                self.__logger.error(error_message, __name__)
                return error_message

            try:
                text = self.__recognizer.recognize_google(speech, language="ru-RU").lower().strip()
                return text

            except BaseException as exception:
                error_message = self._exceptions_handler.get_error_message(exception)

                if isinstance(exception, sr.WaitTimeoutError):
                    self.__logger.warning(self._exceptions_handler.get_error_message(exception), __name__)
                else:
                    self.__logger.error(error_message, __name__)
                    return error_message

Пример #2

Показать файл

Файл: DocumentPreparer.py Проект: vanavski/Text_tone_analyzer

class DocumentPreparer:
    def __init__(self):
        self.__logger = Logger()

        self.__logger.info('DocumentPreparer was successfully initialized.',
                           __name__)

    def split_into_unigrams(self, text):
        if text:
            return re.findall(r'\w+', text)
        else:
            self.__logger.warning('Got empty text.', __name__)

    def split_into_bigrams(self, text):
        if not text:
            self.__logger.warning('Got empty text.', __name__)
            return

        unigrams = self.split_into_unigrams(text)
        bigrams = list()

        if len(unigrams) >= 2:
            for unigram_index in range(len(unigrams) - 1):
                bigram = ' '.join(
                    sorted(
                        [unigrams[unigram_index],
                         unigrams[unigram_index + 1]])).strip()
                bigrams.append(bigram)

            return bigrams
        else:
            self.__logger.info("Text doesn't contain enough words.", __name__)

    def split_into_trigrams(self, text):
        if not text:
            self.__logger.warning('Got empty text.', __name__)
            return

        unigrams = self.split_into_unigrams(text)
        trigrams = list()

        if len(unigrams) >= 3:
            for unigram_index in range(len(unigrams) - 2):
                trigram = ' '.join(
                    sorted([
                        unigrams[unigram_index], unigrams[unigram_index + 1],
                        unigrams[unigram_index + 2]
                    ])).strip()

                trigrams.append(trigram)

            return trigrams
        else:
            self.__logger.info("Text doesn't contain enough words.", __name__)

Пример #3

Показать файл

Файл: DatabaseCursor.py Проект: vanavski/Text_tone_analyzer

class DatabaseCursor:
    def __init__(self):
        # Services
        self.__logger = Logger()
        self._path_service = PathService()
        self._configurator = Configurator()
        self._exceptions_handler = ExceptionsHandler()

        # Data
        self._wd = os.getcwd()
        self._request_url = None
        self.databases_public_keys = None

        self.__logger.info('DatabaseCursor was successfully initialized.',
                           __name__)

    def _load_config(self):
        path_to_config = os.path.join(self._path_service.path_to_configs,
                                      'database_cursor.json')

        if os.path.exists(path_to_config):
            with open(path_to_config, 'r', encoding='utf-8') as file:
                config = json.load(file)

            self._request_url = config['request_url']
            self.databases_public_keys = config['database_public_keys']
        else:
            self.__logger.error(
                "Can't load config for DatabaseCursor (doesn't exist).",
                __name__)

    def __update_connection(self, ngram):
        path_to_db = None

        if ngram.count(' ') == 0:
            path_to_db = self._path_service.get_path_to_database('unigrams.db')

        elif ngram.count(' ') == 1:
            path_to_db = self._path_service.get_path_to_database('bigrams.db')

        elif ngram.count(' ') == 2:
            path_to_db = self._path_service.get_path_to_database('trigrams.db')

        if path_to_db and os.path.exists(path_to_db):
            self.__logger.info(f'Connected to database: {path_to_db}',
                               __name__)

            return sqlite3.connect(path_to_db)

        else:
            self.__logger.warning(f'Database lost: {path_to_db}', __name__)
            self.__logger.info('Trying to download database from cloud...',
                               __name__)

            self._configurator.download_database(path_to_db)

            self.__logger.info(f'Connected to database: {path_to_db}',
                               __name__)

            if os.path.exists(path_to_db):
                return sqlite3.connect(path_to_db)
            else:
                self.__logger.fatal("Database doesn't exist.", __name__)

    def get_entry(self, ngram):
        connection = self.__update_connection(ngram)
        cursor = connection.cursor()

        request = ("""
        SELECT * FROM 'Data' WHERE Ngram='%s'
        """) % ngram

        self.__logger.info(f'Request to DB: {request.strip()}', __name__)

        try:
            cursor.execute(request)
            self.__logger.info('Request is OK.', __name__)

        except BaseException as exception:
            connection.close()

            self.__logger.error(
                self._exceptions_handler.get_error_message(exception),
                __name__)
            return

        result = cursor.fetchone()
        self.__logger.info(f'Received data: {str(result)}', __name__)

        if result:
            connection.close()

            return result[1], result[2]

        else:
            connection.close()

    def entry_exists(self, ngram):
        connection = self.__update_connection(ngram)
        cursor = connection.cursor()

        request = ("""
        SELECT * FROM 'Data' WHERE Ngram='%s'
        """) % ngram

        self.__logger.info(f'Request to DB: {request.strip()}', __name__)

        try:
            cursor.execute(request)
            self.__logger.info('Request is OK.', __name__)

        except BaseException as exception:
            connection.close()

            self.__logger.error(
                self._exceptions_handler.get_error_message(exception),
                __name__)
            return

        if cursor.fetchone():
            connection.close()

            self.__logger.info('Entry exists.', __name__)
            return True

        else:
            connection.close()

            self.__logger.info("Entry doesn't exist.", __name__)
            return False

Пример #4

Показать файл

Файл: Demo.py Проект: vanavski/Text_tone_analyzer

class MainWindow(QWidget):
    def __init__(self):
        super().__init__()
        self.os = platform.system().lower()

        # Services
        self._speech_recognizer = SpeechRecognizer()
        self._file_reader = FileReader()
        self.__logger = Logger()
        self._path_service = PathService()
        self._text_tonal_analyzer = TextTonalAnalyzer('NBC')

        self._config = None
        self._load_config()

        # GUI Elements
        self.line_edit = QLineEdit(self)
        self.answer_label = QLabel(self)
        self.voice_button = QPushButton(self)
        self.answer_button = QPushButton(self)
        self.file_dialog_button = QPushButton(self)
        self.delete_button = QPushButton(self)
        self.message_box = QMessageBox()

    def _load_config(self):
        path_to_config = os.path.join(self._path_service.path_to_configs, 'demo.json')

        if os.path.exists(path_to_config):
            with open(path_to_config, 'r', encoding='utf-8') as file:
                self._config = json.load(file)

            if self.os == 'windows':
                self._config = self._config['windows']
            else:
                self._config = self._config['darwin']
        else:
            self.__logger.fatal("Config for GUI doesn't exist.", __name__)
            exit(-1)

    def _configure_main_window(self):
        self._set_base_params()

        self._configure_line_edit()
        self._configure_answer_button()
        self.configure_voice_button()
        self._configure_delete_button()
        self._configure_file_dialog_button()
        self._configure_answer_label()

        self.__logger.info('Main window was successfully configured.', __name__)

    def _set_base_params(self):
        self.setFixedSize(*self._config['size'])
        self.setStyleSheet('QWidget { background-color: %s }' % self._config['background-color'])

    def _configure_line_edit(self):
        self.line_edit.setToolTip('Enter the text here')
        self.line_edit.returnPressed.connect(self._answer_button_clicked)

        self.line_edit.resize(*self._config['line-edit']['size'])
        self.line_edit.setStyleSheet('QWidget { background-color: %s }' %
                                     self._config['line-edit']['background-color'])
        self.line_edit.move(*self._config['line-edit']['coordinates'])
        self.line_edit.setFont(QFont(*self._config['line-edit']['font']))

    def _configure_answer_button(self):
        self.answer_button.clicked.connect(self._answer_button_clicked)
        self.answer_button.setText('Start')
        self.answer_button.setToolTip('Push to count tonal')

        self.answer_button.setStyleSheet("""
                             QPushButton:hover { background-color: %s }
                             QPushButton:!hover { background-color: %s }
                             QPushButton:pressed { background-color: %s; }
                         """ % (self._config['answer-button']['background-color']['hover'],
                                self._config['answer-button']['background-color']['!hover'],
                                self._config['answer-button']['background-color']['pressed']))
        self.answer_button.resize(*self._config['answer-button']['size'])
        self.answer_button.move(*self._config['answer-button']['coordinates'])
        self.answer_button.setFont(QFont(*self._config['answer-button']['font']))

    def configure_voice_button(self):
        self.voice_button.setText('ðŸŽ™')
        self.voice_button.setToolTip('Push to enter the text by speech')
        self.voice_button.clicked.connect(self._voice_button_clicked)

        self.voice_button.resize(*self._config['voice-button']['size'])
        self.voice_button.setFont(QFont(*self._config['voice-button']['font']))
        self.voice_button.move(*self._config['voice-button']['coordinates'])
        self.voice_button.setStyleSheet("""
                             QPushButton:hover { background-color: %s }
                             QPushButton:!hover { background-color: %s }
                             QPushButton:pressed { background-color: %s; }
                         """ % (self._config['voice-button']['background-color']['hover'],
                                self._config['voice-button']['background-color']['!hover'],
                                self._config['voice-button']['background-color']['pressed']))

    def _configure_delete_button(self):
        self.delete_button.setText('âœ—')
        self.delete_button.setToolTip('Push to clear text box')
        self.delete_button.clicked.connect(self._delete_button_clicked)

        self.delete_button.resize(*self._config['delete-button']['size'])
        self.delete_button.setFont(QFont(*self._config['delete-button']['font']))
        self.delete_button.move(*self._config['delete-button']['coordinates'])
        self.delete_button.setStyleSheet("""
                             QPushButton:hover { background-color: %s }
                             QPushButton:!hover { background-color: %s }
                             QPushButton:pressed { background-color: %s; }
                         """ % (self._config['delete-button']['background-color']['hover'],
                                self._config['delete-button']['background-color']['!hover'],
                                self._config['delete-button']['background-color']['pressed']))

    def _configure_file_dialog_button(self):
        self.file_dialog_button.setText('ðŸ“‚')
        self.file_dialog_button.setToolTip('Push to open file')
        self.file_dialog_button.clicked.connect(self._file_dialog_button_clicked)

        self.file_dialog_button.resize(*self._config['file-dialog-button']['size'])
        self.file_dialog_button.setFont(QFont(*self._config['file-dialog-button']['font']))
        self.file_dialog_button.move(*self._config['file-dialog-button']['coordinates'])
        self.file_dialog_button.setStyleSheet("""
                             QPushButton:hover { background-color: %s }
                             QPushButton:!hover { background-color: %s }
                             QPushButton:pressed { background-color: %s; }
                         """ % (self._config['file-dialog-button']['background-color']['hover'],
                                self._config['file-dialog-button']['background-color']['!hover'],
                                self._config['file-dialog-button']['background-color']['pressed']))

    def _configure_answer_label(self):
        self.answer_label.move(*self._config['answer-label']['coordinates'])
        self.answer_label.setFont(QFont(*self._config['answer-label']['font']))
        self.answer_label.resize(*self._config['answer-label']['size'])

    def launch(self):
        self.setWindowIcon(QIcon('icon.ico'))
        self.setWindowTitle('Sentiment Analyser')

        self._configure_main_window()
        self.show()

        self.__logger.info('Main window was successfully launched.', __name__)

    def _delete_button_clicked(self):
        self.line_edit.clear()
        self.answer_label.clear()

    def _voice_button_clicked(self):
        self.message_box.question(self, 'Speak', 'You can start speeking.', QMessageBox.Ok)

        speech_text = self._speech_recognizer.recognize_speech()

        if speech_text == 'Unknown value':
            try_again = QMessageBox.Yes

            while try_again == QMessageBox.Yes and speech_text == 'Unknown value':
                try_again = self.message_box.question(self, 'Error', 'Unknown value\n Try again?',
                                                      QMessageBox.Yes | QMessageBox.No)
                if try_again == QMessageBox.No:
                    break

                speech_text = self._speech_recognizer.recognize_speech()

        elif speech_text == 'Internet connection lost':
            self.message_box.question(self, 'Error', 'Internet connection lost', QMessageBox.Ok)
            return

        elif speech_text == 'No microphone':
            self.message_box.question(self, 'Error', 'Microphone was disconnected', QMessageBox.Ok)
            return

        if speech_text != 'Unknown value':
            self.line_edit.setText(speech_text)

            return

    def _file_dialog_button_clicked(self):
        file_content = self._file_reader.get_file_content()

        if file_content:
            self.line_edit.setText(file_content)
        else:
            self.__logger.warning('Empty file.', __name__)

    def _answer_button_clicked(self):
        self._text_tonal_analyzer.detect_tonal(self.line_edit.text())

        if self.os == 'windows':
            if self._text_tonal_analyzer.tonal == 'positive':
                self.answer_label.setStyleSheet('QLabel {color:rgba(0, 200, 100, 255)}')
                self.answer_label.move(193.5, 180)

            elif self._text_tonal_analyzer.tonal == 'negative':
                self.answer_label.setStyleSheet('QLabel {color:rgba(255, 56, 20, 255)}')
                self.answer_label.move(180, 180)

        elif self.os == 'darwin':
            if self._text_tonal_analyzer.tonal == 'positive':
                self.answer_label.setStyleSheet('QLabel {color:rgba(0, 200, 100, 255)}')
                self.answer_label.move(230, 210)

            elif self._text_tonal_analyzer.tonal == 'negative':
                self.answer_label.setStyleSheet('QLabel {color:rgba(255, 56, 20, 255)}')
                self.answer_label.move(225, 210)

        self.answer_label.setToolTip('Tonal and probability')

        if self._text_tonal_analyzer.probability:
            self.answer_label.setText(self._text_tonal_analyzer.tonal.capitalize() + '\n' +
                                      str(round(self._text_tonal_analyzer.probability * 100, 3)) + '%')
        else:
            self.answer_label.setText(self._text_tonal_analyzer.tonal.capitalize())

Пример #5

Показать файл

Файл: PathService.py Проект: vanavski/Text_tone_analyzer

class PathService(metaclass=Singleton):
    def __init__(self):
        # Services
        self.__logger = Logger()

        # Data
        self._wd = os.getcwd()
        self.path_to_databases = None
        self.path_to_configs = None
        self._valid_classifiers = None
        self._valid_model_types = None
        self._valid_databases = None
        self._valid_test_results_modes = None
        self._valid_datasets = None
        self.path_to_stop_words = None
        self._path_to_main_directory = None
        self.path_to_vector_model = None
        self._path_to_classifier_models = None
        self._path_to_test_results = None

        self.configure()
        self.__logger.info('PathService was successfully configured.', __name__)

    def _find_main_directory(self):
        max_nesting_level = 5
        nesting_level = 0

        while not os.getcwd().endswith('Python'):
            if os.getcwd().endswith('Databases'):
                os.chdir(os.path.join('..', 'Python'))
                break
            else:
                os.chdir('..')

            nesting_level += 1

            if nesting_level > max_nesting_level:
                self.__logger.fatal("Can't find main directory (exceeded maximum nesting level).", __name__)
                exit(-1)

        self._path_to_main_directory = os.getcwd()
        self.path_to_configs = os.path.join(self._path_to_main_directory, 'Services', 'Configs')
        self.path_to_databases = os.path.abspath(os.path.join('..', 'Databases'))

        os.chdir(self._wd)

    def _check_paths_existing(self):
        if not os.path.exists(self.path_to_configs):
            self.__logger.fatal("Directory with config files doesn't exist.", __name__)
            exit(-1)

        elif not os.path.exists(self.path_to_databases):
            self.__logger.fatal("Directory with databases doesn't exist.", __name__)
            exit(-1)

        elif not os.path.exists(self._path_to_classifier_models):
            self.__logger.fatal("Directory with classifier models doesn't exist.", __name__)
            exit(-1)

        if not os.path.exists(self.path_to_vector_model):
            self.path_to_vector_model = None
            self.__logger.error("Vector model doesn't exist.", __name__)

        if not os.path.exists(self.path_to_stop_words):
            self.path_to_stop_words = None
            self.__logger.error("File with stop-words doesn't exist.", __name__)

        if not os.path.exists(self._path_to_test_results):
            self._path_to_test_results = None
            self.__logger.warning("Directory with tests reports doesn't exist.", __name__)

    def _load_config(self):
        path_to_config = os.path.join(self.path_to_configs, 'path_service.json')

        if not os.path.exists(path_to_config):
            self.__logger.error("Can't find config-file for PathService.", __name__)

        with open(path_to_config, 'r', encoding='utf-8') as file:
            config = json.load(file)

        self._valid_classifiers = config['valid_classifiers']
        self._valid_databases = config['valid_databases']
        self._valid_datasets = config['valid_datasets']
        self._valid_test_results_modes = config['valid_test_results_modes']
        self._valid_model_types = config['valid_model_types']

    def configure(self):
        self._find_main_directory()
        self._load_config()

        self.path_to_vector_model = os.path.join(self.path_to_databases, 'ruscorpora_upos_skipgram_300_10_2017.bin.gz')
        self.path_to_stop_words = os.path.join(self._path_to_main_directory, 'Services', 'Lemmatizer',
                                               'stop_words.json')
        self._path_to_classifier_models = os.path.join(self.path_to_databases, 'Models')
        self._path_to_test_results = os.path.join(self._path_to_main_directory, 'Tests', 'System', 'Reports')

        self._check_paths_existing()

    def get_path_to_test_results(self, mode='classifier', classifier_name='NBC'):
        if classifier_name not in self._valid_classifiers:
            self.__logger.warning('Got incorrect classifier name.', __name__)
            classifier_name = 'NBC'

        if classifier_name not in self._valid_test_results_modes:
            self.__logger.warning('Got incorrect mode.', __name__)
            return self._path_to_test_results

        if mode.lower().strip() == 'vec_model':
            return os.path.join(self._path_to_test_results, 'VectorModel')

        elif mode.lower().strip() == 'classifier_main':
            return os.path.join(self._path_to_test_results, '..', '..', 'MainReports', 'Classifier', classifier_name)

        elif mode.lower().strip() == 'classifier':
            return self._path_to_test_results

    def get_path_to_model(self, model='unigrams', classifier_name='NBC'):
        if classifier_name not in self._valid_classifiers:
            self.__logger.warning('Got incorrect classifier name.', __name__)
            classifier_name = 'NBC'

        if model not in self._valid_model_types:
            self.__logger.warning('Got incorrect model type.', __name__)
            model = 'unigrams'

        path_to_models = os.path.join(self._path_to_classifier_models, classifier_name)

        if os.path.exists(path_to_models):
            path_to_required_model = os.path.join(path_to_models, f'model_{model}.pkl')

            return path_to_required_model
        else:
            self.__logger.error("Required model wasn't found.", __name__)

    def get_path_to_database(self, database_name='unigrams.db'):
        if database_name not in self._valid_databases:
            self.__logger.warning('Got incorrect database name.', __name__)
            database_name = 'unigrams.db'

        path_to_database = os.path.join(self.path_to_databases, database_name)

        return path_to_database

    def get_path_to_dataset(self, dataset):
        if dataset not in self._valid_datasets:
            self.__logger.warning('Got incorrect dataset name.', __name__)
            dataset = 'dataset_with_unigrams.csv'

        path_to_dataset = os.path.join(self.path_to_databases, dataset)

        return path_to_dataset

    def set_path_to_vector_model(self, path_to_vector_model):
        self.path_to_vector_model = path_to_vector_model

Пример #6

Показать файл

Файл: TextTonalAnalyzer.py Проект: rosspeckomplekt/Text_tone_analyzer

class TextTonalAnalyzer:
    def __init__(self, classifier_name='NBC'):
        # Services
        self._configurator = Configurator()
        self._configurator.configure_system()

        self._database_cursor = DatabaseCursor()
        self._document_preparer = DocumentPreparer()
        self._text_weight_counter = TextWeightCounter()
        self._classifier = Classifier()
        self.__logger = Logger()
        self._lemmatizer = Lemmatizer()
        self._path_service = PathService()

        # Data
        self._classifier_name = classifier_name

        self._text = None
        self.tonal = None
        self.probability = 0

        self._unigrams = None
        self._bigrams = None
        self._trigrams = None

        self._unigrams_weight = None
        self._bigrams_weight = None
        self._trigrams_weight = None

        self.__logger.info('TextTonalAnalyzer was successfully initialized.',
                           __name__)

    def _reset_data(self):
        self._text = None
        self.tonal = None
        self.probability = 0

        self._unigrams = None
        self._bigrams = None
        self._trigrams = None

        self._unigrams_weight = None
        self._bigrams_weight = None
        self._trigrams_weight = None

        self.__logger.info('Data was successfully reset.', __name__)

    def _document_prepare(self):
        self._unigrams = self._document_preparer.split_into_unigrams(
            self._text)
        self._bigrams = self._document_preparer.split_into_bigrams(self._text)
        self._trigrams = self._document_preparer.split_into_trigrams(
            self._text)

    def _text_in_dataset(self):
        path_to_dataset = self._path_service.get_path_to_dataset(
            'dataset_with_unigrams.csv')

        with open(path_to_dataset, 'r', encoding='utf-8') as file:
            dataset = csv.reader(file)
            for doc in dataset:
                doc = ''.join(doc).split(';')
                if doc[0] == self._text:
                    self.tonal = doc[1]
                    self.probability = 1

                    self.__logger.info('Document is in dataset.', __name__)
                    return True

        return False

    def _count_weight_by_unigrams(self):
        self._unigrams_weight = self._text_weight_counter.count_weight_by_unigrams(
            self._unigrams)

    def _count_weight_by_bigrams(self):
        self._bigrams_weight = self._text_weight_counter.count_weight_by_bigrams(
            self._bigrams)

    def _count_weight_by_trigrams(self):
        self._trigrams_weight = self._text_weight_counter.count_weight_by_trigrams(
            self._trigrams)

    def detect_tonal(self, text):
        self._reset_data()

        self._text = self._lemmatizer.get_text_initial_form(text)

        if not self._text:
            self.tonal = 'Unknown'

            self.__logger.warning('Text is empty.', __name__)
            return None

        self._document_prepare()

        if not self._text_in_dataset():
            threads = list()

            threads.append(
                Thread(target=self._count_weight_by_unigrams, args=()))
            threads.append(
                Thread(target=self._count_weight_by_bigrams, args=()))
            threads.append(
                Thread(target=self._count_weight_by_trigrams, args=()))

            for thread in threads:
                thread.start()

            for thread in threads:
                while thread.is_alive():
                    time.sleep(0.1)

                thread.join()

            self._classifier.customize(self._unigrams_weight,
                                       self._bigrams_weight,
                                       self._trigrams_weight,
                                       self._classifier_name)

            self.tonal, self.probability = self._classifier.predict_tonal()

            self.__logger.page_break()

Пример #7

Показать файл

class NgramAnalyzer:
    def __init__(self):
        # Services
        self._database_cursor = DatabaseCursor()
        self.__logger = Logger()
        self._exceptions_hanlder = ExceptionsHandler()
        self._lemmatizer = Lemmatizer()
        self._path_service = PathService()
        self._configurator = Configurator()
        self._morph_analyzer = pymorphy2.MorphAnalyzer()

        # Data
        self._vec_model = None

        self._load_vec_model()

        self.__logger.info('NgramAnalyzer was successfully initialized.', __name__)

    def _load_vec_model(self):
        if not self._path_service.path_to_vector_model:
            self.__logger.warning("Vector model doesn't exist.", __name__)

            self._configurator.download_vector_model()
            self._path_service.set_path_to_vector_model(os.path.join(self._path_service.path_to_databases,
                                                                     'ruscorpora_upos_skipgram_300_10_2017.bin.gz'))
            self.__logger.info('Vector model was successfully downloaded.', __name__)

        if self._path_service.path_to_vector_model:
            self._vec_model = gensim.models.KeyedVectors.load_word2vec_format(self._path_service.path_to_vector_model,
                                                                              binary=True)
        else:
            self.__logger.error("Vector model doesn't exist.", __name__)

    def _part_of_speech_detect(self, word):
        if not word:
            return

        part_of_speech = self._morph_analyzer.parse(word)[0].tag.POS

        if part_of_speech:
            if re.match(r'ADJ', part_of_speech):
                return 'ADJ'

            elif re.match(r'PRT', part_of_speech):
                return 'PRT'

            elif part_of_speech == 'INFN':
                return 'VERB'

            elif part_of_speech == 'ADVB' or part_of_speech == 'PRED':
                return 'ADV'

            elif part_of_speech == 'PRCL':
                return 'PART'

        return part_of_speech

    @staticmethod
    def _detect_ngram_type(ngram):
        if not ngram:
            return

        if ngram.count(' ') == 0:
            return 'unigram'

        elif ngram.count(' ') == 1:
            return 'bigram'

        elif ngram.count(' ') == 2:
            return 'trigram'

    def _nearest_synonyms_find(self, word, topn):
        if not self._vec_model or not word or topn <= 0:
            return

        nearest_synonyms = list()
        part_of_speech = self._part_of_speech_detect(word)
        ngram_type = self._detect_ngram_type(word)

        if part_of_speech:
            word = word + '_%s' % self._part_of_speech_detect(word)

        try:
            for synonym in self._vec_model.most_similar(positive=[word], topn=topn * 10):
                found_synonym = self._lemmatizer.get_text_initial_form(synonym[0].split('_')[0])

                if found_synonym and self._detect_ngram_type(found_synonym) == ngram_type:
                    nearest_synonyms.append({'word': found_synonym,
                                             'cosine proximity': synonym[1]})

                if len(nearest_synonyms) == topn:
                    break

        except BaseException as exception:
            self.__logger.warning(self._exceptions_hanlder.get_error_message(exception), __name__)
            return

        return nearest_synonyms

    def relevant_ngram_find(self, ngram):
        if not ngram:
            return

        self.__logger.info(f'Start ngram: {ngram}', __name__)

        response = {'synonym_found': False, 'content': dict()}

        if self._detect_ngram_type(ngram) == 'unigram':
            synonyms_count = 10
            nearest_synonyms = self._nearest_synonyms_find(ngram, synonyms_count)

            if not nearest_synonyms:
                return response

            for nearest_synonym in nearest_synonyms:
                data = self._database_cursor.get_entry(nearest_synonym['word'])

                if data and data[0]:
                    self.__logger.info(f'Relevant ngram: {nearest_synonym["word"]}', __name__)

                    response['synonym_found'] = True

                    response['content']['synonym'] = nearest_synonym['word']
                    response['content']['pos_docs'] = data[0]
                    response['content']['neg_docs'] = data[1]

                    return response

        return response

Пример #8

Показать файл

Файл: Configurator.py Проект: vanavski/Text_tone_analyzer

class Configurator(metaclass=Singleton):
    def __init__(self):
        # Services
        self.__logger = Logger()
        self._path_service = PathService()
        self._exceptions_handler = ExceptionsHandler()

        # Data
        self._config = dict()
        self._wd = os.getcwd()
        self._path_to_databases = None
        self._request_url = None
        self._vector_model_public_key = None
        self._databases_public_keys = None

        self._load_public_keys()

        self.__logger.info('Configurator was successfully initialized.',
                           __name__)

    def _load_public_keys(self):
        path_to_config = os.path.join(self._path_service.path_to_configs,
                                      'configurator.json')

        if os.path.exists(path_to_config):
            with open(path_to_config, 'r', encoding='utf-8') as file:
                config = json.load(file)

            self._request_url = config['request_url']
            self._vector_model_public_key = config['vector_model_public_key']
            self._databases_public_keys = config['databases_public_keys']

        else:
            self.__logger.error(
                "Can't load config for Configrurator (doesn't exist).",
                __name__)

    def download_database(self, path_to_db):
        database_name = os.path.split(path_to_db)[1]

        if database_name:
            try:
                download_url = requests.get(
                    self._request_url,
                    params={
                        'public_key':
                        self._databases_public_keys[database_name]
                    }).json()["href"]

                with open(path_to_db, 'wb') as database_file:
                    database_file.write(requests.get(download_url).content)

                self._config[path_to_db] = 'downloaded'

            except BaseException as exception:
                self.__logger.error(
                    self._exceptions_handler.get_error_message(exception),
                    __name__)
                self._config[path_to_db] = 'error'

    def download_vector_model(self):
        self._path_service.set_path_to_vector_model(
            os.path.join(self._path_service.path_to_databases,
                         'ruscorpora_upos_skipgram_300_10_2017.bin.gz'))

        try:
            download_url = requests.get(self._request_url,
                                        params={
                                            'public_key':
                                            self._vector_model_public_key
                                        }).json()["href"]

            with open(self._path_service.path_to_vector_model,
                      'wb') as vec_model:
                vec_model.write(requests.get(download_url).content)

            self._config[
                'ruscorpora_upos_skipgram_300_10_2017.bin.gz'] = 'downloaded'

        except BaseException as exception:
            self.__logger.error(
                self._exceptions_handler.get_error_message(exception),
                __name__)

            self._config[
                'ruscorpora_upos_skipgram_300_10_2017.bin.gz'] = 'error'

    def configure_system(self):
        self._config['datetime'] = str(datetime.now())

        for database in ['unigrams.db', 'bigrams.db', 'trigrams.db']:
            path_to_database = self._path_service.get_path_to_database(
                database)

            if not path_to_database or not os.path.exists(path_to_database):
                self.__logger.warning('Database not found: %s' % str(database),
                                      __name__)
                self.download_database(
                    os.path.join(self._path_service.path_to_databases,
                                 database))
            else:
                self._config[database] = 'exists'

        if not self._path_service.path_to_vector_model or not os.path.exists(
                self._path_service.path_to_vector_model):
            self.__logger.warning('Vector model not found.', __name__)
            self.download_vector_model()
        else:
            self._config[
                'ruscorpora_upos_skipgram_300_10_2017.bin.gz'] = 'exists'

        self._create_config()

    def _create_config(self):
        with open(os.path.join('Logs', 'config.json'), 'w',
                  encoding='utf-8') as config:
            json.dump(self._config, config, indent=4)

Пример #9

Показать файл

Файл: Lemmatizer.py Проект: vanavski/Text_tone_analyzer

class Lemmatizer:
    def __init__(self):
        # Services
        self._spell_checker = SpellChecker()
        self.__logger = Logger()
        self._path_service = PathService()
        self._morph_analyzer = pymorphy2.MorphAnalyzer()

        # Data
        self._stop_words = self._read_stop_words()
        self._parts_of_speech_to_remove = ['NUMR', 'NPRO', 'PREP']

        self.__logger.info('Lemmatizer was successfully initialized.',
                           __name__)

    @staticmethod
    def _contains_latin_letter(word):
        if word:
            return all(map(lambda c: c in ascii_letters, word))

    def _detect_part_of_speech(self, word):
        if word:
            return self._morph_analyzer.parse(word)[0].tag.POS

    def _is_stop_word(self, word):
        if not word:
            self.__logger.warning('Got empty word.', __name__)
            return

        word = f' {word} '

        for stop_words in self._stop_words.values():
            if word in stop_words:
                return True

        return False

    def _remove_words_without_emotions(self, text):
        if not text:
            self.__logger.warning('Got empty text.', __name__)
            return

        cleaned_text = list()

        for word in re.findall(r'\w+', text):
            if not self._detect_part_of_speech(word) in self._parts_of_speech_to_remove and\
                    not self._is_stop_word(word):
                cleaned_text.append(word)

        return ' '.join(cleaned_text).strip()

    def _read_stop_words(self):
        if os.path.exists(self._path_service.path_to_stop_words):
            with open(self._path_service.path_to_stop_words,
                      'r',
                      encoding='utf-8') as file:
                return json.load(file)

    def _delete_words_contains_latin_letters(self, text):
        text = ' '.join([
            word for word in re.findall(
                r'\w+', self._spell_checker.check_spelling(text.lower()))
            if not self._contains_latin_letter(word) and word.isalpha()
        ]).strip()

        if text:
            return text
        else:
            self.__logger.warning(
                'All words in document contain latin letters or all words are digits.',
                __name__)

    def _get_text_normal_form(self, text):
        return ' '.join([self._morph_analyzer.parse(word)[0].normal_form + ' ' for word in re.findall(r'\w+', text)]) \
            .strip()

    def get_text_initial_form(self, text):
        if not text:
            self.__logger.warning('Got empty text.', __name__)
            return

        self.__logger.info(f'Start text: {text}', __name__)

        transformations = [
            self._delete_words_contains_latin_letters,
            self._get_text_normal_form, self._remove_words_without_emotions
        ]

        for transformation in transformations:
            text = transformation(text)

            if not text:
                return

        self.__logger.info(f'Lemmatized text: {text}', __name__)
        return text