def _word_suggestion(text, language, n_words_to_generate): language_validation(language) if not text or type(text) != str: raise ValidationError("Invalid text") if ( not n_words_to_generate or type(n_words_to_generate) != int or n_words_to_generate <= 0 or n_words_to_generate > 50 ): raise ValidationError("Invalid number of words to generate") try: answer_task = celery_app.send_task( TASK_NLU_WORD_SUGGESTION_TEXT, args=[text, n_words_to_generate], queue=queue_name(language, ACTION_WORD_SUGGESTION, "SPACY"), ) answer_task.wait() answer = answer_task.result except TimeLimitExceeded: raise CeleryTimeoutException() answer.update( {"text": text, "language": language, "n_words_to_generate": n_words_to_generate} ) return answer
def _words_distribution(authorization, language, repository_version=None): language_validation(language) repository_authorization = repository_authorization_validation( authorization) current_update = backend().request_backend_train(repository_authorization, language, repository_version) try: answer_task = celery_app.send_task( TASK_NLU_WORDS_DISTRIBUTION, args=[ current_update.get("current_version_id"), language, repository_authorization, ], queue=queue_name(language, ACTION_WORDS_DISTIRBUTION), ) answer_task.wait() answer = answer_task.result except TimeLimitExceeded: raise CeleryTimeoutException() return answer
def check_language_priority(language, repository_authorization, repository_version): if language: language = str(language).lower() language = re.split(r"[-_]", language)[0] language_validation(language) # Tries to get repository by DEFAULT_LANGS (hard-coded exceptions) repository = {} if language in DEFAULT_LANGS_PRIORITY.keys(): priority_ordered_langs = DEFAULT_LANGS_PRIORITY.get(language) for lang in priority_ordered_langs: try: repository = backend().request_backend_parse( repository_authorization, lang, repository_version ) except Exception: repository = {} if repository.get("total_training_end"): break # Else tries to get most generic repository ('LANG' only) else: try: repository = backend().request_backend_parse( repository_authorization, language, repository_version ) except Exception: repository = {} return repository
def qa_handler( authorization, knowledge_base_id, question, language, from_backend=False, user_agent=None, ): language_validation(language) user_base_authorization = repository_authorization_validation( authorization) if not question or type(question) != str: raise EmptyInputException() elif len(question) > BOTHUB_NLP_API_QA_QUESTION_LIMIT: raise LargeQuestionException(len(question), limit=BOTHUB_NLP_API_QA_QUESTION_LIMIT) request = backend().request_backend_knowledge_bases( user_base_authorization, knowledge_base_id, language) text = request.get("text") if not text: raise EmptyBaseException() elif len(text) > BOTHUB_NLP_API_QA_TEXT_LIMIT: raise LargeContextException(len(text), limit=BOTHUB_NLP_API_QA_TEXT_LIMIT) result = request_torchserve(text, question, language) if len(result["answers"]) > 0: answer_object = result["answers"][0] answer = answer_object["text"] confidence = float(answer_object["confidence"]) else: answer = "" confidence = .0 log = threading.Thread( target=backend().send_log_qa_nlp_parse, kwargs={ "data": { "answer": answer, "confidence": confidence, "question": question, "user_agent": user_agent, "nlp_log": json.dumps(result), "user": str(user_base_authorization), "knowledge_base": int(knowledge_base_id), "language": language, "from_backend": from_backend, } }, ) log.start() return result
def evaluate_handler(authorization, language, repository_version=None): repository_authorization = repository_authorization_validation( authorization) language_validation(language) try: repository = backend().request_backend_evaluate( repository_authorization, language, repository_version) except Exception: repository = {} if not repository.get("update"): raise ValidationError("This repository has never been trained") model = get_language_model(repository) try: cross_validation = False evaluate_task = celery_app.send_task( TASK_NLU_EVALUATE_UPDATE, args=[ repository_version, repository.get( "repository_version"), # repository_version_language_id repository_authorization, cross_validation, repository.get("language"), ], queue=queue_name(repository.get("language"), ACTION_EVALUATE, model), ) evaluate_task.wait() evaluate = evaluate_task.result evaluate_report = { "language": language, "status": EVALUATE_STATUS_PROCESSING, "repository_version": repository.get("repository_version"), "evaluate_id": evaluate.get("id") if evaluate is not None else None, "evaluate_version": evaluate.get("version") if evaluate is not None else None, "cross_validation": cross_validation, } except TimeLimitExceeded: raise CeleryTimeoutException() except Exception as e: evaluate_report = {"status": EVALUATE_STATUS_FAILED, "error": str(e)} return evaluate_report
def _intent_sentence_suggestion( authorization, language, intent, n_sentences_to_generate, percentage_to_replace, repository_version=None, ): repository_authorization = repository_authorization_validation( authorization) language_validation(language) if not intent or type(intent) != str: raise ValidationError("Invalid intent") if (not n_sentences_to_generate or type(n_sentences_to_generate) != int or n_sentences_to_generate <= 0 or n_sentences_to_generate > 50): raise ValidationError("Invalid number of sentences to generate") if (not percentage_to_replace or type(percentage_to_replace) != float or percentage_to_replace <= 0 or percentage_to_replace > 1): raise ValidationError("Invalid percentage to replace") try: update = backend().request_backend_parse(repository_authorization, language, repository_version) except Exception: update = {} try: answer_task = celery_app.send_task( TASK_NLU_INTENT_SENTENCE_SUGGESTION_TEXT, args=[ update.get("repository_version"), repository_authorization, intent, percentage_to_replace, n_sentences_to_generate, ], queue=queue_name(language, ACTION_INTENT_SENTENCE_SUGGESTION, "SPACY"), ) answer_task.wait() answer = answer_task.result except TimeLimitExceeded: raise CeleryTimeoutException() answer.update({ "language": language, "n_sentences_to_generate": n_sentences_to_generate, "percentage_to_replace": percentage_to_replace, "intent": intent, }) return answer
def _debug_parse(authorization, text, language, repository_version=None): from ..utils import DEFAULT_LANGS_PRIORITY language_validation(language) repository_authorization = repository_authorization_validation( authorization) if type(text) != str or not text: raise ValidationError("Text required.") try: update = backend().request_backend_parse(repository_authorization, language, repository_version) except Exception: update = {} if not update.get("version"): next_languages = DEFAULT_LANGS_PRIORITY.get(language, []) for next_language in next_languages: update = backend().request_backend_parse(repository_authorization, next_language, repository_version) if update.get("version"): break if not update.get("version"): raise ValidationError("This repository has never been trained") model = get_language_model(update) try: answer_task = celery_app.send_task( TASK_NLU_DEBUG_PARSE_TEXT, args=[ update.get("repository_version"), repository_authorization, text ], queue=queue_name(update.get("language"), ACTION_DEBUG_PARSE, model), ) answer_task.wait() answer = answer_task.result except TimeLimitExceeded: raise CeleryTimeoutException() answer.update({ "text": text, "repository_version": update.get("repository_version"), "language": update.get("language"), }) return answer
def crossvalidation_evaluate_handler(authorization, language, repository_version=None): repository_authorization = repository_authorization_validation( authorization) language_validation(language) try: repository = backend().request_backend_start_automatic_evaluate( repository_authorization, repository_version, language) except Exception: repository = {} if not repository.get("can_run_automatic_evaluate"): raise ValidationError("Validation error") model = get_language_model(repository) try: job_id = f'bothub_{settings.ENVIRONMENT}_evaluate_{repository.get("repository_version_language_id")}_{language}_{str(int(time.time()))}' send_job_train_ai_platform( jobId=job_id, repository_version=str( repository.get("repository_version_language_id")), by_id=str(repository.get("user_id")), repository_authorization=str(repository_authorization), language=language, type_model=model, operation="evaluate", ) backend().request_backend_save_queue_id( update_id=str(repository.get("repository_version_language_id")), repository_authorization=str(repository_authorization), task_id=job_id, from_queue=0, type_processing=2, ) evaluate_report = { "language": language, "status": EVALUATE_STATUS_PROCESSING, "repository_version": repository.get("repository_version_language_id"), "evaluate_id": None, "evaluate_version": None, "cross_validation": True, } except Exception as e: evaluate_report = {"status": EVALUATE_STATUS_FAILED, "error": str(e)} return evaluate_report
def _sentence_suggestion( text, language, n_sentences_to_generate, percentage_to_replace ): language_validation(language) if not text or type(text) != str: raise ValidationError("Invalid text") if ( not n_sentences_to_generate or type(n_sentences_to_generate) != int or n_sentences_to_generate <= 0 or n_sentences_to_generate > 50 ): raise ValidationError("Invalid number of sentences to generate") if ( not percentage_to_replace or type(percentage_to_replace) != float or percentage_to_replace <= 0 or percentage_to_replace > 1 ): raise ValidationError("Invalid percentage to replace") try: answer_task = celery_app.send_task( TASK_NLU_SENTENCE_SUGGESTION_TEXT, args=[text, percentage_to_replace, n_sentences_to_generate], queue=queue_name(language, ACTION_SENTENCE_SUGGESTION, "SPACY"), ) answer_task.wait() answer = answer_task.result except TimeLimitExceeded: raise CeleryTimeoutException() answer.update( { "text": text, "language": language, "n_sentences_to_generate": n_sentences_to_generate, "percentage_to_replace": percentage_to_replace, } ) return answer
def train_handler(authorization, repository_version=None, language=None): repository_authorization = repository_authorization_validation( authorization) languages_report = {} train_tasks = [] if language: language_validation(language) language_status = backend().request_backend_train( repository_authorization, language, repository_version) ready_to_train_languages = ([ language_status ] if language_status.get("ready_for_train") else []) else: ready_to_train_languages = backend( ).request_all_readytotrain_languages(repository_authorization, repository_version) for repository in ready_to_train_languages: model = get_language_model(repository) if settings.BOTHUB_SERVICE_TRAIN == "celery": train_task = celery_app.send_task( TASK_NLU_TRAIN_UPDATE, args=[ repository.get("current_version_id"), repository.get("repository_authorization_user_id"), repository_authorization, ], queue=queue_name(repository.get("language"), ACTION_TRAIN, model), ) train_tasks.append({ "task": train_task, "language": repository.get("language") }) elif settings.BOTHUB_SERVICE_TRAIN == "ai-platform": job_id = f'bothub_{settings.ENVIRONMENT}_train_{str(repository.get("current_version_id"))}_{repository.get("language")}_{str(int(time.time()))}' send_job_train_ai_platform( jobId=job_id, repository_version=str(repository.get("current_version_id")), by_id=str(repository.get("repository_authorization_user_id")), repository_authorization=str(repository_authorization), language=repository.get("language"), type_model=model, operation="train", ) backend().request_backend_save_queue_id( update_id=str(repository.get("current_version_id")), repository_authorization=str(repository_authorization), task_id=job_id, from_queue=0, type_processing=0, ) languages_report[repository.get("language")] = { "status": TRAIN_STATUS_PROCESSING } resp = { "SUPPORTED_LANGUAGES": list(settings.SUPPORTED_LANGUAGES.keys()), "languages_report": languages_report, } return resp