def test_qa_interface_check_engines_docs_se(): with pytest.raises(SystemExit): QAInterface( detector=AnswerDetector, question_engine=QuestionSearchEngine, faq_engine=FAQSearchEngine, docs_engine="not a SearchEngine", )
def test_qa_interface_check_detector(): with pytest.raises(SystemExit): QAInterface( detector="not an AnswerDetector", question_engine=QuestionSearchEngine, faq_engine=FAQSearchEngine, docs_engine=SearchEngine, )
def __init__(self, model=None, db_name="data_storage", num_answers_inf=1): self.model = "distilbert-base-cased-distilled-squad" if model: check_model_availability(model) self.model = model gpu = 0 if torch.cuda.is_available() else -1 self.answer_detector = AnswerDetector( model=self.model, device=gpu, num_answers_to_predict=num_answers_inf) data_storage = Database(f"{db_name}.db") faq_se, docs_se, question_se = setup_search_engines(db=data_storage) self.qa_interface = QAInterface(detector=self.answer_detector, question_engine=question_se, faq_engine=faq_se, docs_engine=docs_se)
def __init__(self, model=None, db_name="data_storage", num_answers_to_predict=3): self.model = "distilbert-base-cased-distilled-squad" if model: check_model_availability(model) self.model = model self.db_name = db_name # better if just CPU for inference gpu = 0 if torch.cuda.is_available() else -1 self.answer_detector = AnswerDetector( model=self.model, device=gpu, num_answers_to_predict=num_answers_to_predict ) data_storage = Database(f"{self.db_name}.db") faq_se, docs_se, question_se = setup_search_engines(db=data_storage) self.qa_interface = QAInterface( detector=self.answer_detector, question_engine=question_se, faq_engine=faq_se, docs_engine=docs_se, ) # thread that innits donkeybot instance wont used db again data_storage.close_connection()
def main(): # Parse cli arguments parser = argparse.ArgumentParser( description="""Use this script to ask DonkeyBot!""") optional = parser.add_argument_group("optional arguments") optional.add_argument( "-m", "--model", default="distilbert-base-cased-distilled-squad", help= "BERT/DistilBERT model used to inference answers. (default is distilbert-base-cased-distilled-squad)", ) optional.add_argument( "-db", "--db_name", default="data_storage", help= "Name of database where all data is stored. (default is data_storage)", ) optional.add_argument( "-s", "--store_answers", type=str2bool, nargs="?", const=True, default=False, help= "Store the answers on the '--answers_table' table. (default is False)", ) optional.add_argument( "-n", "--num_answers_predicted_per_document", default=3, help="Number of answers predicted per document. (default is 3)", ) optional.add_argument( "--answers_table", default="answers", help="Name of the answers table. (default is 'answers')", ) args = parser.parse_args() db_name = args.db_name model = args.model answers_table = args.answers_table store_answers = args.store_answers num_answers_inf = int(args.num_answers_predicted_per_document) check_model_availability(model) # prepare data_storage data_storage = Database(f"{db_name}.db") # check for the answers table tables_in_db = list([table[0] for table in data_storage.get_tables()]) if answers_table not in tables_in_db: print(f"Creating '{answers_table}' table in {db_name}.db") data_storage.create_answers_table(table_name=f"{answers_table}") # load answer detector print("Loading AnswerDetector...") gpu = 0 if torch.cuda.is_available() else -1 answer_detector = AnswerDetector(model=model, device=gpu, num_answers_to_predict=num_answers_inf) # load search engines faq_se, docs_se, question_se = setup_search_engines(db=data_storage) # load interface qa_interface = QAInterface( detector=answer_detector, question_engine=question_se, faq_engine=faq_se, docs_engine=docs_se, ) # Main Loop print("DonkeyBot ready to be asked!") try: while True: print("\nCTRL+C to exit donkeybot") query = str(input("ask question: ")) top_k = int(input("how many answers: ")) start_time = time.time() answers = qa_interface.get_answers(query, top_k=top_k) print( f"Total inference time: {round(time.time() - start_time, 2)} seconds" ) print_answers(answers) if store_answers: for answer in answers: data_storage.insert_answer(answer, table_name=f"{answers_table}") except KeyboardInterrupt: data_storage.close_connection() sys.exit("\nExiting...")