def test_update_log_create_new(self): bot = 'test' user = '******' count = COMPONENT_COUNT.copy() count['http_action'] = 6 count['domain']['intents'] = 12 summary = { 'intents': ['Intent not added to domain'], 'config': ['Invalid component'] } DataImporterLogProcessor.update_summary(bot, user, count, summary) log = next(DataImporterLogProcessor.get_logs(bot)) assert log.get('intents').get('data') == ['Intent not added to domain'] assert not log.get('stories').get('data') assert not log.get('utterances').get('data') assert not log.get('http_actions').get('data') assert not log.get('training_examples').get('data') assert not log.get('domain').get('data') assert log.get('config').get('data') == ['Invalid component'] assert not log.get('exception') assert not log['is_data_uploaded'] assert log['start_timestamp'] assert log.get('end_timestamp') assert not log.get('validation_status') assert log['event_status'] == EVENT_STATUS.COMPLETED.value
async def test_trigger_data_importer_validate_exception(self, monkeypatch): bot = 'test_events' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow())) os.mkdir(test_data_path) def _path(*args, **kwargs): return test_data_path monkeypatch.setattr(Utility, "get_latest_file", _path) DataImporterLogProcessor.add_log(bot, user, files_received=REQUIREMENTS - {"http_actions"}) await EventsTrigger.trigger_data_importer(bot, user, False, False) logs = list(DataImporterLogProcessor.get_logs(bot)) assert len(logs) == 2 assert not logs[0].get('intents').get('data') assert not logs[0].get('stories').get('data') assert not logs[0].get('utterances').get('data') assert not logs[0].get('http_actions').get('data') assert not logs[0].get('training_examples').get('data') assert not logs[0].get('domain').get('data') assert not logs[0].get('config').get('data') assert logs[0].get('exception') == 'Some training files are absent!' assert logs[0]['is_data_uploaded'] assert logs[0]['start_timestamp'] assert logs[0]['end_timestamp'] assert logs[0]['status'] == 'Failure' assert logs[0]['event_status'] == EVENT_STATUS.FAIL.value
async def test_trigger_data_importer_validate_file_with_errors( self, monkeypatch): bot = 'test_events' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow())) shutil.copytree('tests/testing_data/validator/intent_name_mismatch', test_data_path) def _path(*args, **kwargs): return test_data_path monkeypatch.setattr(Utility, "get_latest_file", _path) DataImporterLogProcessor.add_log(bot, user, files_received=REQUIREMENTS - {"http_actions"}) await EventsTrigger.trigger_data_importer(bot, user, True, False) logs = list(DataImporterLogProcessor.get_logs(bot)) assert len(logs) == 4 assert logs[0].get('intents').get('data') assert not logs[0].get('stories').get('data') assert not logs[0].get('utterances').get('data') assert not logs[0].get('http_actions').get('data') assert not logs[0].get('training_examples').get('data') assert not logs[0].get('domain').get('data') assert not logs[0].get('config').get('data') assert not logs[0].get('exception') assert logs[0]['is_data_uploaded'] assert logs[0]['start_timestamp'] assert logs[0]['end_timestamp'] assert logs[0]['status'] == 'Failure' assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value
def test_add_log_success(self): bot = 'test' user = '******' DataImporterLogProcessor.add_log(bot, user, files_received=list( REQUIREMENTS.copy()), is_data_uploaded=False) DataImporterLogProcessor.add_log( bot, user, status='Success', event_status=EVENT_STATUS.COMPLETED.value) log = list(DataImporterLogProcessor.get_logs(bot)) assert not log[0].get('intents').get('data') assert not log[0].get('stories').get('data') assert not log[0].get('utterances').get('data') assert not log[0].get('http_actions').get('data') assert not log[0].get('training_examples').get('data') assert not log[0].get('domain').get('data') assert not log[0].get('config').get('data') assert not log[0].get('exception') assert not log[0]['is_data_uploaded'] assert log[0]['start_timestamp'] assert log[0].get('end_timestamp') assert all(file in log[0]['files_received'] for file in REQUIREMENTS) assert log[0].get('status') == 'Success' assert log[0]['event_status'] == EVENT_STATUS.COMPLETED.value
def test_is_event_in_progress_true(self): bot = 'test' user = '******' DataImporterLogProcessor.add_log(bot, user, is_data_uploaded=False) assert DataImporterLogProcessor.is_event_in_progress(bot, False) with pytest.raises(AppException): DataImporterLogProcessor.is_event_in_progress(bot)
async def test_trigger_data_importer_rules_only(self, monkeypatch, get_training_data): bot = 'test_trigger_data_importer_rules_only' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow())) data_path = os.path.join(test_data_path, 'data') Utility.make_dirs(data_path) shutil.copy2('tests/testing_data/validator/valid/data/rules.yml', data_path) nlu, story_graph, domain, config, http_actions = await get_training_data( 'tests/testing_data/validator/valid') mongo_processor = MongoProcessor() mongo_processor.save_domain(domain, bot, user) mongo_processor.save_nlu(nlu, bot, user) config["bot"] = bot config["user"] = user config_obj = Configs._from_son(config) config_obj.save() mongo_processor.save_stories(story_graph.story_steps, bot, user) mongo_processor.save_http_action(http_actions, bot, user) def _path(*args, **kwargs): return test_data_path monkeypatch.setattr(Utility, "get_latest_file", _path) DataImporterLogProcessor.add_log(bot, user, files_received=["rules"]) await EventsTrigger.trigger_data_importer(bot, user, True, False) logs = list(DataImporterLogProcessor.get_logs(bot)) assert len(logs) == 1 assert not logs[0].get('intents').get('data') assert not logs[0].get('stories').get('data') assert not logs[0].get('utterances').get('data') assert not logs[0].get('http_actions').get('data') assert not logs[0].get('training_examples').get('data') assert not logs[0].get('domain').get('data') assert not logs[0].get('config').get('data') assert not logs[0].get('exception') assert logs[0]['is_data_uploaded'] assert logs[0]['start_timestamp'] assert logs[0]['end_timestamp'] assert logs[0]['status'] == 'Success' assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value assert len(mongo_processor.fetch_stories(bot)) == 2 assert len(list(mongo_processor.fetch_training_examples(bot))) == 7 assert len(list(mongo_processor.fetch_responses(bot))) == 2 assert len(mongo_processor.fetch_actions(bot)) == 2 assert len(mongo_processor.fetch_rule_block_names(bot)) == 3
async def get_data_importer_logs(current_user: User = Depends( auth.get_current_user_and_bot)): """ Get data importer event logs. """ logs = list(DataImporterLogProcessor.get_logs(current_user.get_bot())) return Response(data=logs)
async def test_trigger_data_importer_event_connection_error( self, monkeypatch): bot = 'test_events_bot_1' user = '******' event_url = "http://url.event4" monkeypatch.setitem(Utility.environment['model']['data_importer'], "event_url", event_url) await EventsTrigger.trigger_data_importer(bot, user, False, False) logs = list(DataImporterLogProcessor.get_logs(bot)) assert len(logs) == 1 assert not logs[0].get('intents').get('data') assert not logs[0].get('stories').get('data') assert not logs[0].get('utterances').get('data') assert not logs[0].get('http_actions').get('data') assert not logs[0].get('training_examples').get('data') assert not logs[0].get('domain').get('data') assert not logs[0].get('config').get('data') assert logs[0].get('exception') == 'Failed to trigger the event.' assert logs[0]['is_data_uploaded'] assert logs[0]['start_timestamp'] assert logs[0]['end_timestamp'] assert logs[0]['status'] == 'Failure' assert logs[0]['event_status'] == EVENT_STATUS.FAIL.value
def test_add_log(self): bot = 'test' user = '******' DataImporterLogProcessor.add_log(bot, user, is_data_uploaded=False) log = ValidationLogs.objects(bot=bot).get().to_mongo().to_dict() assert not log.get('intents').get('data') assert not log.get('stories').get('data') assert not log.get('utterances').get('data') assert not log.get('http_actions').get('data') assert not log.get('training_examples').get('data') assert not log.get('domain').get('data') assert not log.get('config').get('data') assert not log.get('exception') assert not log['is_data_uploaded'] assert log['start_timestamp'] assert not log.get('end_timestamp') assert not log.get('validation_status') assert log['event_status'] == EVENT_STATUS.INITIATED.value
async def test_trigger_data_importer_validate_and_save_append( self, monkeypatch): bot = 'test_events' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow())) shutil.copytree('tests/testing_data/validator/append', test_data_path) def _path(*args, **kwargs): return test_data_path monkeypatch.setattr(Utility, "get_latest_file", _path) DataImporterLogProcessor.add_log(bot, user, files_received=REQUIREMENTS - {"http_actions", "rules"}) await EventsTrigger.trigger_data_importer(bot, user, True, False) logs = list(DataImporterLogProcessor.get_logs(bot)) assert len(logs) == 6 assert not logs[0].get('intents').get('data') assert not logs[0].get('stories').get('data') assert not logs[0].get('utterances').get('data') assert not logs[0].get('http_actions').get('data') assert not logs[0].get('training_examples').get('data') assert not logs[0].get('domain').get('data') assert not logs[0].get('config').get('data') assert not logs[0].get('exception') assert logs[0]['is_data_uploaded'] assert logs[0]['start_timestamp'] assert logs[0]['end_timestamp'] assert logs[0]['status'] == 'Success' assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value processor = MongoProcessor() assert 'greet' in processor.fetch_intents(bot) assert 'deny' in processor.fetch_intents(bot) assert 'location' in processor.fetch_intents(bot) assert 'affirm' in processor.fetch_intents(bot) assert len(processor.fetch_stories(bot)) == 4 assert len(list(processor.fetch_training_examples(bot))) == 13 assert len(list(processor.fetch_responses(bot))) == 6 assert len(processor.fetch_actions(bot)) == 4 assert len(processor.fetch_rule_block_names(bot)) == 4
async def test_trigger_data_importer_import_with_intent_issues( self, monkeypatch): bot = 'test_trigger_data_importer_import_with_intent_issues' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow())) shutil.copytree('tests/testing_data/validator/intent_name_mismatch', test_data_path) def _path(*args, **kwargs): return test_data_path monkeypatch.setattr(Utility, "get_latest_file", _path) BotSettings(ignore_utterances=True, bot=bot, user=user).save() DataImporterLogProcessor.add_log( bot, user, files_received=['nlu', 'stories', 'domain', 'config']) await EventsTrigger.trigger_data_importer(bot, user, True, True) logs = list(DataImporterLogProcessor.get_logs(bot)) assert len(logs) == 1 assert logs[0].get('intents').get('data') assert not logs[0].get('stories').get('data') assert not logs[0].get('utterances').get('data') assert not logs[0].get('http_actions').get('data') assert not logs[0].get('training_examples').get('data') assert not logs[0].get('domain').get('data') assert not logs[0].get('config').get('data') assert not logs[0].get('exception') assert logs[0]['is_data_uploaded'] assert logs[0]['start_timestamp'] assert logs[0]['end_timestamp'] assert logs[0]['status'] == 'Failure' assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value mongo_processor = MongoProcessor() assert len(mongo_processor.fetch_stories(bot)) == 0 assert len(list(mongo_processor.fetch_training_examples(bot))) == 0 assert len(list(mongo_processor.fetch_responses(bot))) == 0 assert len(mongo_processor.fetch_actions(bot)) == 0 assert len(mongo_processor.fetch_rule_block_names(bot)) == 0
def test_add_log_exception(self): bot = 'test' user = '******' DataImporterLogProcessor.add_log(bot, user, exception='Validation failed', status='Failure', event_status=EVENT_STATUS.FAIL.value) log = ValidationLogs.objects(bot=bot).get().to_mongo().to_dict() assert not log.get('intents').get('data') assert not log.get('stories').get('data') assert not log.get('utterances').get('data') assert not log.get('http_actions').get('data') assert not log.get('training_examples').get('data') assert not log.get('domain').get('data') assert not log.get('config').get('data') assert not log.get('files_received') assert log.get('exception') == 'Validation failed' assert not log['is_data_uploaded'] assert log['start_timestamp'] assert log.get('end_timestamp') assert log.get('status') == 'Failure' assert log['event_status'] == EVENT_STATUS.FAIL.value
async def test_trigger_data_importer_validate_and_save_event_overwrite( self, monkeypatch): bot = 'test_events_bot_1' user = '******' event_url = "http://url.event2" monkeypatch.setitem(Utility.environment['model']['data_importer'], "event_url", event_url) responses.add( "POST", event_url, json={"message": "Event triggered successfully!"}, status=200, match=[ responses.json_params_matcher([{ 'name': 'BOT', 'value': bot }, { 'name': 'USER', 'value': user }, { 'name': 'IMPORT_DATA', 'value': '--import-data' }, { 'name': 'OVERWRITE', 'value': '--overwrite' }]) ], ) responses.start() await EventsTrigger.trigger_data_importer(bot, user, True, True) responses.stop() request = json.loads(responses.calls[1].request.body) logs = list(DataImporterLogProcessor.get_logs(bot)) assert len(logs) == 1 assert not logs[0].get('intents').get('data') assert not logs[0].get('stories').get('data') assert not logs[0].get('utterances').get('data') assert not logs[0].get('http_actions').get('data') assert not logs[0].get('training_examples').get('data') assert not logs[0].get('domain').get('data') assert not logs[0].get('config').get('data') assert not logs[0].get('exception') assert logs[0]['is_data_uploaded'] assert logs[0]['start_timestamp'] assert not logs[0].get('end_timestamp') assert not logs[0].get('status') assert logs[0]['event_status'] == EVENT_STATUS.TASKSPAWNED.value
async def test_trigger_data_importer_validate_existing_data( self, monkeypatch, get_training_data): bot = 'test_trigger_data_importer_domain_only' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow())) Utility.make_dirs(test_data_path) def _path(*args, **kwargs): return test_data_path monkeypatch.setattr(Utility, "get_latest_file", _path) DataImporterLogProcessor.add_log(bot, user) await EventsTrigger.trigger_data_importer(bot, user, True, False) logs = list(DataImporterLogProcessor.get_logs(bot)) assert len(logs) == 2 assert not logs[0].get('intents').get('data') assert not logs[0].get('stories').get('data') assert not logs[0].get('utterances').get('data') assert not logs[0].get('http_actions').get('data') assert not logs[0].get('training_examples').get('data') assert not logs[0].get('domain').get('data') assert not logs[0].get('config').get('data') assert not logs[0].get('exception') assert logs[0]['is_data_uploaded'] assert logs[0]['start_timestamp'] assert logs[0]['end_timestamp'] assert logs[0]['status'] == 'Success' assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value mongo_processor = MongoProcessor() assert len(mongo_processor.fetch_stories(bot)) == 2 assert len(list(mongo_processor.fetch_training_examples(bot))) == 7 assert len(list(mongo_processor.fetch_responses(bot))) == 2 assert len(mongo_processor.fetch_actions(bot)) == 2 assert len(mongo_processor.fetch_rule_block_names(bot)) == 3
async def validate_training_data( background_tasks: BackgroundTasks, current_user: User = Depends(auth.get_current_user_and_bot), ): """ Validates bot training data. """ DataImporterLogProcessor.is_limit_exceeded(current_user.get_bot()) DataImporterLogProcessor.is_event_in_progress(current_user.get_bot()) Utility.make_dirs( os.path.join("training_data", current_user.get_bot(), str(datetime.utcnow()))) DataImporterLogProcessor.add_log(current_user.get_bot(), current_user.get_user(), is_data_uploaded=False) background_tasks.add_task(EventsTrigger.trigger_data_importer, current_user.get_bot(), current_user.get_user(), False, False) return {"message": "Event triggered! Check logs."}
async def trigger_data_importer(bot: Text, user: Text, save_data: bool, overwrite: bool = True): """ Triggers data importer event which validates and imports data into kairon. @param bot: bot id. @param user: kairon username. @param save_data: Flag to import data into kairon. If set to false, then only validation is run. Otherwise, both validation and import is done. @param overwrite: Overwrite existing data(if set to true) or append (if set to false). @return: """ validation_status = 'Failure' path = None try: if Utility.get_event_url("DATA_IMPORTER"): import_flag = '--import-data' if save_data else '' overwrite_flag = '--overwrite' if overwrite else '' env_var = { 'BOT': bot, 'USER': user, "IMPORT_DATA": import_flag, "OVERWRITE": overwrite_flag } event_request = Utility.build_event_request(env_var) Utility.http_request( "POST", Utility.environment['model']['data_importer'].get( 'event_url'), None, user, event_request) DataImporterLogProcessor.add_log( bot, user, event_status=EVENT_STATUS.TASKSPAWNED.value) else: path = Utility.get_latest_file( os.path.join('training_data', bot)) files_received = DataImporterLogProcessor.get_files_received_for_latest_event( bot) DataImporterLogProcessor.add_log( bot, user, event_status=EVENT_STATUS.PARSE.value) data_importer = DataImporter(path, bot, user, files_received, save_data, overwrite) DataImporterLogProcessor.add_log( bot, user, event_status=EVENT_STATUS.VALIDATING.value) summary, component_count = await data_importer.validate() is_data_valid = all( [not summary[key] for key in summary.keys()]) validation_status = 'Success' if is_data_valid else 'Failure' DataImporterLogProcessor.update_summary( bot, user, component_count, summary, status=validation_status, event_status=EVENT_STATUS.SAVE.value) if is_data_valid: data_importer.import_data() DataImporterLogProcessor.add_log( bot, user, event_status=EVENT_STATUS.COMPLETED.value) except exceptions.ConnectionError as e: logger.error(str(e)) DataImporterLogProcessor.add_log( bot, user, exception='Failed to trigger the event.', status=validation_status, event_status=EVENT_STATUS.FAIL.value) except Exception as e: logger.error(str(e)) DataImporterLogProcessor.add_log( bot, user, exception=str(e), status=validation_status, event_status=EVENT_STATUS.FAIL.value) if path: Utility.delete_directory(path)
def test_is_event_in_progress_false(self): bot = 'test' assert not DataImporterLogProcessor.is_event_in_progress(bot)
def test_get_logs(self): bot = 'test' logs = list(DataImporterLogProcessor.get_logs(bot)) assert len(logs) == 3
def test_get_files_received(self): bot = 'test' files = DataImporterLogProcessor.get_files_received_for_latest_event( bot) assert isinstance(files, set) assert files == REQUIREMENTS
def test_is_limit_exceeded_false(self, monkeypatch): monkeypatch.setitem(Utility.environment['model']['data_importer'], "limit_per_day", 6) bot = 'test' assert not DataImporterLogProcessor.is_limit_exceeded(bot)
def test_is_limit_exceeded_exception(self, monkeypatch): monkeypatch.setitem(Utility.environment['model']['data_importer'], "limit_per_day", 3) bot = 'test' with pytest.raises(AppException): assert DataImporterLogProcessor.is_limit_exceeded(bot)