import codecs import sys import gl import query import similarity import time import mention_id import knowledge_base reload(sys) sys.setdefaultencoding('utf-8') if __name__ == '__main__': # Step1: load knowledge base print 'begin to load kb' kb = knowledge_base.KnowledgeBase() kb.load_knowledge_base() print 'finish loading kb' # Step2: load mention2id print 'begin to load mention2id' mid = mention_id.MentionID() mid.load_mention_2_id() print 'finish loading mention2id' # training phase # Step3: load questions print 'begin to load questions' query_list = query.QueryList() query_list.read_query_file(gl.testing_data_split_file_name)
import sys import cherrypy sys.path.insert(-1, '../nlp/') sys.path.insert(-1, '../logic_component/') import tagger import knowledge_base from rulebases import rb_sports db = knowledge_base.KnowledgeBase(rb_sports.SportsClaim) class Server(object): def __init__(self): self.tgr = tagger.initialize_tagger() def factcheck(self, txt): tokens = tagger.tokenize(txt) tags = tagger.get_tags(self.tgr, tokens) answers = [] for extractor in [ tagger.extract_player_relation, tagger.extract_age_relation ]: relation = extractor(tags) if relation: print 'found relation', relation ans = db.trueOrFalse( rb_sports.SportsClaim(relation[0], tuple(relation[1:]))) print ans
def response_message(self, message): # 의도 분류 # 0: Fragments # 1: Statement # 2: Question # 3: Command # 4: Rhetorical question # 5: Rhetorical command # 6: Intonation - dependent utterance intent_index = self.intent_analyzer.predict(self.mecab, message) print('intent_index : ' + str(intent_index)) # 사용자의 말과 챗봇의 말이 어떤 message_type 인지 기록 # message_type : CN -> Client Normal, BF -> Bot Feature ( 추출을 위한 질문 ) message_type = '' if intent_index == 2: message_type = 'CQ' elif intent_index == 3: message_type = 'CC' else: if self.conversation_list: last_conversation = self.conversation_list[-1] if last_conversation['type'] == 'BF': message_type = 'CA' self.save_conversation_list(message, message_type, '') response_message = [] if intent_index not in [2, 3]: # 로직대로.. 원래 하던대로 # type 을 가져올 때, Bot 이 말한 마지막 message_type 을 가지고 옴 prior_conversation_type = self.get_prior_bot_message_type() response_message = self.print_gambit_message( message, intent_index, prior_conversation_type['message_type'], prior_conversation_type['feature_key']) elif intent_index == 2: bot_message_type = '' know_base = knowledge_base.KnowledgeBase() response_message = know_base.response_question(self.mecab, message) # knowledgebase 에서 검색 실패 하는 경우 정규식 매칭 진행 if len(response_message) == 0: bot_message_type = 'BA' response_message = self.find_matching_rule(message) # 정규식 매칭도 진행되지 않는 경우는 얼버무리기 로직 if len(response_message) == 0: bot_message_type = 'BQ' response_message = self.response_quibble_message(message) else: bot_message_type = 'BA' self.save_conversation_list(response_message, bot_message_type, '') elif intent_index == 3: bot_message_type = '' comm_processor = command_processor.CommandProcessor() response_message_dict = comm_processor.process_command( self.mecab, message) # response_message 를 dict 으로 받아서 적절한 처리를 하는 방향으로 수정 # recoomand 의 경우 봇과 함께 동작해야 함 ( 다른 항목들처럼 독립적이지 않음 ) if response_message_dict['command'] == 'recommend': bot_message_type = 'BF' self.find_feature_candidate(message) response_message = self.print_gambit_message( message, intent_index, bot_message_type, '') else: bot_message_type = 'BA' response_message = response_message_dict['message'] if not response_message: response_message = self.response_quibble_message(message) self.save_conversation_list(response_message, bot_message_type, '') return response_message