Пример #1
0
    async def test_trigger_data_importer_validate_exception(self, monkeypatch):
        bot = 'test_events'
        user = '******'
        test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow()))
        os.mkdir(test_data_path)

        def _path(*args, **kwargs):
            return test_data_path

        monkeypatch.setattr(Utility, "get_latest_file", _path)

        DataImporterLogProcessor.add_log(bot,
                                         user,
                                         files_received=REQUIREMENTS -
                                         {"http_actions"})
        await EventsTrigger.trigger_data_importer(bot, user, False, False)
        logs = list(DataImporterLogProcessor.get_logs(bot))
        assert len(logs) == 2
        assert not logs[0].get('intents').get('data')
        assert not logs[0].get('stories').get('data')
        assert not logs[0].get('utterances').get('data')
        assert not logs[0].get('http_actions').get('data')
        assert not logs[0].get('training_examples').get('data')
        assert not logs[0].get('domain').get('data')
        assert not logs[0].get('config').get('data')
        assert logs[0].get('exception') == 'Some training files are absent!'
        assert logs[0]['is_data_uploaded']
        assert logs[0]['start_timestamp']
        assert logs[0]['end_timestamp']
        assert logs[0]['status'] == 'Failure'
        assert logs[0]['event_status'] == EVENT_STATUS.FAIL.value
Пример #2
0
    async def test_trigger_data_importer_validate_file_with_errors(
            self, monkeypatch):
        bot = 'test_events'
        user = '******'
        test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow()))
        shutil.copytree('tests/testing_data/validator/intent_name_mismatch',
                        test_data_path)

        def _path(*args, **kwargs):
            return test_data_path

        monkeypatch.setattr(Utility, "get_latest_file", _path)

        DataImporterLogProcessor.add_log(bot,
                                         user,
                                         files_received=REQUIREMENTS -
                                         {"http_actions"})
        await EventsTrigger.trigger_data_importer(bot, user, True, False)
        logs = list(DataImporterLogProcessor.get_logs(bot))
        assert len(logs) == 4
        assert logs[0].get('intents').get('data')
        assert not logs[0].get('stories').get('data')
        assert not logs[0].get('utterances').get('data')
        assert not logs[0].get('http_actions').get('data')
        assert not logs[0].get('training_examples').get('data')
        assert not logs[0].get('domain').get('data')
        assert not logs[0].get('config').get('data')
        assert not logs[0].get('exception')
        assert logs[0]['is_data_uploaded']
        assert logs[0]['start_timestamp']
        assert logs[0]['end_timestamp']
        assert logs[0]['status'] == 'Failure'
        assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value
Пример #3
0
 def test_add_log_success(self):
     bot = 'test'
     user = '******'
     DataImporterLogProcessor.add_log(bot,
                                      user,
                                      files_received=list(
                                          REQUIREMENTS.copy()),
                                      is_data_uploaded=False)
     DataImporterLogProcessor.add_log(
         bot,
         user,
         status='Success',
         event_status=EVENT_STATUS.COMPLETED.value)
     log = list(DataImporterLogProcessor.get_logs(bot))
     assert not log[0].get('intents').get('data')
     assert not log[0].get('stories').get('data')
     assert not log[0].get('utterances').get('data')
     assert not log[0].get('http_actions').get('data')
     assert not log[0].get('training_examples').get('data')
     assert not log[0].get('domain').get('data')
     assert not log[0].get('config').get('data')
     assert not log[0].get('exception')
     assert not log[0]['is_data_uploaded']
     assert log[0]['start_timestamp']
     assert log[0].get('end_timestamp')
     assert all(file in log[0]['files_received'] for file in REQUIREMENTS)
     assert log[0].get('status') == 'Success'
     assert log[0]['event_status'] == EVENT_STATUS.COMPLETED.value
Пример #4
0
    def test_is_event_in_progress_true(self):
        bot = 'test'
        user = '******'
        DataImporterLogProcessor.add_log(bot, user, is_data_uploaded=False)
        assert DataImporterLogProcessor.is_event_in_progress(bot, False)

        with pytest.raises(AppException):
            DataImporterLogProcessor.is_event_in_progress(bot)
Пример #5
0
    async def test_trigger_data_importer_rules_only(self, monkeypatch,
                                                    get_training_data):
        bot = 'test_trigger_data_importer_rules_only'
        user = '******'
        test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow()))
        data_path = os.path.join(test_data_path, 'data')
        Utility.make_dirs(data_path)
        shutil.copy2('tests/testing_data/validator/valid/data/rules.yml',
                     data_path)
        nlu, story_graph, domain, config, http_actions = await get_training_data(
            'tests/testing_data/validator/valid')
        mongo_processor = MongoProcessor()
        mongo_processor.save_domain(domain, bot, user)
        mongo_processor.save_nlu(nlu, bot, user)
        config["bot"] = bot
        config["user"] = user
        config_obj = Configs._from_son(config)
        config_obj.save()
        mongo_processor.save_stories(story_graph.story_steps, bot, user)
        mongo_processor.save_http_action(http_actions, bot, user)

        def _path(*args, **kwargs):
            return test_data_path

        monkeypatch.setattr(Utility, "get_latest_file", _path)

        DataImporterLogProcessor.add_log(bot, user, files_received=["rules"])
        await EventsTrigger.trigger_data_importer(bot, user, True, False)
        logs = list(DataImporterLogProcessor.get_logs(bot))
        assert len(logs) == 1
        assert not logs[0].get('intents').get('data')
        assert not logs[0].get('stories').get('data')
        assert not logs[0].get('utterances').get('data')
        assert not logs[0].get('http_actions').get('data')
        assert not logs[0].get('training_examples').get('data')
        assert not logs[0].get('domain').get('data')
        assert not logs[0].get('config').get('data')
        assert not logs[0].get('exception')
        assert logs[0]['is_data_uploaded']
        assert logs[0]['start_timestamp']
        assert logs[0]['end_timestamp']
        assert logs[0]['status'] == 'Success'
        assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value

        assert len(mongo_processor.fetch_stories(bot)) == 2
        assert len(list(mongo_processor.fetch_training_examples(bot))) == 7
        assert len(list(mongo_processor.fetch_responses(bot))) == 2
        assert len(mongo_processor.fetch_actions(bot)) == 2
        assert len(mongo_processor.fetch_rule_block_names(bot)) == 3
Пример #6
0
 def test_add_log(self):
     bot = 'test'
     user = '******'
     DataImporterLogProcessor.add_log(bot, user, is_data_uploaded=False)
     log = ValidationLogs.objects(bot=bot).get().to_mongo().to_dict()
     assert not log.get('intents').get('data')
     assert not log.get('stories').get('data')
     assert not log.get('utterances').get('data')
     assert not log.get('http_actions').get('data')
     assert not log.get('training_examples').get('data')
     assert not log.get('domain').get('data')
     assert not log.get('config').get('data')
     assert not log.get('exception')
     assert not log['is_data_uploaded']
     assert log['start_timestamp']
     assert not log.get('end_timestamp')
     assert not log.get('validation_status')
     assert log['event_status'] == EVENT_STATUS.INITIATED.value
Пример #7
0
    async def test_trigger_data_importer_validate_and_save_append(
            self, monkeypatch):
        bot = 'test_events'
        user = '******'
        test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow()))
        shutil.copytree('tests/testing_data/validator/append', test_data_path)

        def _path(*args, **kwargs):
            return test_data_path

        monkeypatch.setattr(Utility, "get_latest_file", _path)

        DataImporterLogProcessor.add_log(bot,
                                         user,
                                         files_received=REQUIREMENTS -
                                         {"http_actions", "rules"})
        await EventsTrigger.trigger_data_importer(bot, user, True, False)
        logs = list(DataImporterLogProcessor.get_logs(bot))
        assert len(logs) == 6
        assert not logs[0].get('intents').get('data')
        assert not logs[0].get('stories').get('data')
        assert not logs[0].get('utterances').get('data')
        assert not logs[0].get('http_actions').get('data')
        assert not logs[0].get('training_examples').get('data')
        assert not logs[0].get('domain').get('data')
        assert not logs[0].get('config').get('data')
        assert not logs[0].get('exception')
        assert logs[0]['is_data_uploaded']
        assert logs[0]['start_timestamp']
        assert logs[0]['end_timestamp']
        assert logs[0]['status'] == 'Success'
        assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value

        processor = MongoProcessor()
        assert 'greet' in processor.fetch_intents(bot)
        assert 'deny' in processor.fetch_intents(bot)
        assert 'location' in processor.fetch_intents(bot)
        assert 'affirm' in processor.fetch_intents(bot)
        assert len(processor.fetch_stories(bot)) == 4
        assert len(list(processor.fetch_training_examples(bot))) == 13
        assert len(list(processor.fetch_responses(bot))) == 6
        assert len(processor.fetch_actions(bot)) == 4
        assert len(processor.fetch_rule_block_names(bot)) == 4
Пример #8
0
async def validate_training_data(
        background_tasks: BackgroundTasks,
        current_user: User = Depends(auth.get_current_user_and_bot),
):
    """
    Validates bot training data.
    """
    DataImporterLogProcessor.is_limit_exceeded(current_user.get_bot())
    DataImporterLogProcessor.is_event_in_progress(current_user.get_bot())
    Utility.make_dirs(
        os.path.join("training_data", current_user.get_bot(),
                     str(datetime.utcnow())))
    DataImporterLogProcessor.add_log(current_user.get_bot(),
                                     current_user.get_user(),
                                     is_data_uploaded=False)
    background_tasks.add_task(EventsTrigger.trigger_data_importer,
                              current_user.get_bot(), current_user.get_user(),
                              False, False)
    return {"message": "Event triggered! Check logs."}
Пример #9
0
 def test_update_log(self):
     bot = 'test'
     user = '******'
     DataImporterLogProcessor.add_log(bot, user, is_data_uploaded=False)
     log = next(DataImporterLogProcessor.get_logs(bot))
     assert not log.get('intents').get('data')
     assert not log.get('stories').get('data')
     assert not log.get('utterances').get('data')
     assert not log.get('http_actions').get('data')
     assert not log.get('training_examples').get('data')
     assert not log.get('domain').get('data')
     assert not log.get('config').get('data')
     assert not log.get('exception')
     assert not log['is_data_uploaded']
     assert log['start_timestamp']
     assert not log.get('end_timestamp')
     assert not log.get('validation_status')
     assert log['event_status'] == EVENT_STATUS.INITIATED.value
     count = COMPONENT_COUNT.copy()
     count['http_action'] = 6
     count['domain']['intents'] = 12
     summary = {
         'intents': ['Intent not added to domain'],
         'config': ['Invalid component']
     }
     DataImporterLogProcessor.update_summary(bot, user, count, summary,
                                             'Failed', 'Completed')
     log = next(DataImporterLogProcessor.get_logs(bot))
     assert log.get('intents').get('data') == ['Intent not added to domain']
     assert not log.get('stories').get('data')
     assert not log.get('utterances').get('data')
     assert not log.get('http_actions').get('data')
     assert not log.get('training_examples').get('data')
     assert not log.get('domain').get('data')
     assert log.get('config').get('data') == ['Invalid component']
     assert not log.get('exception')
     assert not log['is_data_uploaded']
     assert log['start_timestamp']
     assert log.get('end_timestamp')
     assert log.get('status') == 'Failed'
     assert log['event_status'] == EVENT_STATUS.COMPLETED.value
Пример #10
0
    async def test_trigger_data_importer_import_with_intent_issues(
            self, monkeypatch):
        bot = 'test_trigger_data_importer_import_with_intent_issues'
        user = '******'
        test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow()))
        shutil.copytree('tests/testing_data/validator/intent_name_mismatch',
                        test_data_path)

        def _path(*args, **kwargs):
            return test_data_path

        monkeypatch.setattr(Utility, "get_latest_file", _path)
        BotSettings(ignore_utterances=True, bot=bot, user=user).save()

        DataImporterLogProcessor.add_log(
            bot, user, files_received=['nlu', 'stories', 'domain', 'config'])
        await EventsTrigger.trigger_data_importer(bot, user, True, True)
        logs = list(DataImporterLogProcessor.get_logs(bot))
        assert len(logs) == 1
        assert logs[0].get('intents').get('data')
        assert not logs[0].get('stories').get('data')
        assert not logs[0].get('utterances').get('data')
        assert not logs[0].get('http_actions').get('data')
        assert not logs[0].get('training_examples').get('data')
        assert not logs[0].get('domain').get('data')
        assert not logs[0].get('config').get('data')
        assert not logs[0].get('exception')
        assert logs[0]['is_data_uploaded']
        assert logs[0]['start_timestamp']
        assert logs[0]['end_timestamp']
        assert logs[0]['status'] == 'Failure'
        assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value

        mongo_processor = MongoProcessor()
        assert len(mongo_processor.fetch_stories(bot)) == 0
        assert len(list(mongo_processor.fetch_training_examples(bot))) == 0
        assert len(list(mongo_processor.fetch_responses(bot))) == 0
        assert len(mongo_processor.fetch_actions(bot)) == 0
        assert len(mongo_processor.fetch_rule_block_names(bot)) == 0
Пример #11
0
 def test_add_log_exception(self):
     bot = 'test'
     user = '******'
     DataImporterLogProcessor.add_log(bot,
                                      user,
                                      exception='Validation failed',
                                      status='Failure',
                                      event_status=EVENT_STATUS.FAIL.value)
     log = ValidationLogs.objects(bot=bot).get().to_mongo().to_dict()
     assert not log.get('intents').get('data')
     assert not log.get('stories').get('data')
     assert not log.get('utterances').get('data')
     assert not log.get('http_actions').get('data')
     assert not log.get('training_examples').get('data')
     assert not log.get('domain').get('data')
     assert not log.get('config').get('data')
     assert not log.get('files_received')
     assert log.get('exception') == 'Validation failed'
     assert not log['is_data_uploaded']
     assert log['start_timestamp']
     assert log.get('end_timestamp')
     assert log.get('status') == 'Failure'
     assert log['event_status'] == EVENT_STATUS.FAIL.value
Пример #12
0
    async def test_trigger_data_importer_validate_existing_data(
            self, monkeypatch, get_training_data):
        bot = 'test_trigger_data_importer_domain_only'
        user = '******'
        test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow()))
        Utility.make_dirs(test_data_path)

        def _path(*args, **kwargs):
            return test_data_path

        monkeypatch.setattr(Utility, "get_latest_file", _path)

        DataImporterLogProcessor.add_log(bot, user)
        await EventsTrigger.trigger_data_importer(bot, user, True, False)
        logs = list(DataImporterLogProcessor.get_logs(bot))
        assert len(logs) == 2
        assert not logs[0].get('intents').get('data')
        assert not logs[0].get('stories').get('data')
        assert not logs[0].get('utterances').get('data')
        assert not logs[0].get('http_actions').get('data')
        assert not logs[0].get('training_examples').get('data')
        assert not logs[0].get('domain').get('data')
        assert not logs[0].get('config').get('data')
        assert not logs[0].get('exception')
        assert logs[0]['is_data_uploaded']
        assert logs[0]['start_timestamp']
        assert logs[0]['end_timestamp']
        assert logs[0]['status'] == 'Success'
        assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value

        mongo_processor = MongoProcessor()
        assert len(mongo_processor.fetch_stories(bot)) == 2
        assert len(list(mongo_processor.fetch_training_examples(bot))) == 7
        assert len(list(mongo_processor.fetch_responses(bot))) == 2
        assert len(mongo_processor.fetch_actions(bot)) == 2
        assert len(mongo_processor.fetch_rule_block_names(bot)) == 3
Пример #13
0
    async def trigger_data_importer(bot: Text,
                                    user: Text,
                                    save_data: bool,
                                    overwrite: bool = True):
        """
        Triggers data importer event which validates and imports data into kairon.
        @param bot: bot id.
        @param user: kairon username.
        @param save_data: Flag to import data into kairon. If set to false, then only validation is run.
                        Otherwise, both validation and import is done.
        @param overwrite: Overwrite existing data(if set to true) or append (if set to false).
        @return:
        """
        validation_status = 'Failure'
        path = None
        try:
            if Utility.get_event_url("DATA_IMPORTER"):
                import_flag = '--import-data' if save_data else ''
                overwrite_flag = '--overwrite' if overwrite else ''
                env_var = {
                    'BOT': bot,
                    'USER': user,
                    "IMPORT_DATA": import_flag,
                    "OVERWRITE": overwrite_flag
                }
                event_request = Utility.build_event_request(env_var)
                Utility.http_request(
                    "POST", Utility.environment['model']['data_importer'].get(
                        'event_url'), None, user, event_request)
                DataImporterLogProcessor.add_log(
                    bot, user, event_status=EVENT_STATUS.TASKSPAWNED.value)
            else:
                path = Utility.get_latest_file(
                    os.path.join('training_data', bot))
                files_received = DataImporterLogProcessor.get_files_received_for_latest_event(
                    bot)
                DataImporterLogProcessor.add_log(
                    bot, user, event_status=EVENT_STATUS.PARSE.value)
                data_importer = DataImporter(path, bot, user, files_received,
                                             save_data, overwrite)
                DataImporterLogProcessor.add_log(
                    bot, user, event_status=EVENT_STATUS.VALIDATING.value)

                summary, component_count = await data_importer.validate()
                is_data_valid = all(
                    [not summary[key] for key in summary.keys()])
                validation_status = 'Success' if is_data_valid else 'Failure'
                DataImporterLogProcessor.update_summary(
                    bot,
                    user,
                    component_count,
                    summary,
                    status=validation_status,
                    event_status=EVENT_STATUS.SAVE.value)

                if is_data_valid:
                    data_importer.import_data()
                DataImporterLogProcessor.add_log(
                    bot, user, event_status=EVENT_STATUS.COMPLETED.value)
        except exceptions.ConnectionError as e:
            logger.error(str(e))
            DataImporterLogProcessor.add_log(
                bot,
                user,
                exception='Failed to trigger the event.',
                status=validation_status,
                event_status=EVENT_STATUS.FAIL.value)

        except Exception as e:
            logger.error(str(e))
            DataImporterLogProcessor.add_log(
                bot,
                user,
                exception=str(e),
                status=validation_status,
                event_status=EVENT_STATUS.FAIL.value)
        if path:
            Utility.delete_directory(path)