Пример #1
0
def set_input_interface(func):
    """
	修改输入接口的函数
	:param func: 函数无形参,有返回值
	:return:
	"""
    GlobalVariable.set_value('INPUT', func)
Пример #2
0
def set_output_interface(func):
    """
	修改输出接口的函数
	:param func: 函数有形参,无返回值
	:return:
	"""
    GlobalVariable.set_value('OUPUT', func)
Пример #3
0
def init_system():
    """
	系统的初始化函数, 第一个需要运行的函数
	:return: NULL
	"""
    print('init global variable---------')
    GlobalVariable._init()
Пример #4
0
    def create_dialog(self):
        # 问题输入
        GlobalVariable.get_value('OUTPUT')("您好,有什么能帮助您的?")
        init_sentence = GlobalVariable.get_value('INPUT')()

        # 新增一个对话场景,并将初始句加入相应对话场景中
        self.dialog_tree.add_dialog_branch(init_sentence, 'FAQ')

        # 开始对话决策过程
        self.dialog_policy.input_query(init_sentence)
Пример #5
0
 def __init__(self):
     self.args = GlobalVariable.get_value('BERT_ARGS')
     self.model = BertForSequenceClassification.from_pretrained(self.args.get('bert_model'),
                                                                cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(
                                                                    self.args.get('local_rank')))
     self.processors = {'SentencePro': sentencePro}
     self.processor = self.processors['SentencePro']()
     self.label_list = self.processor.get_labels()
     self.device = torch.device("cuda" if torch.cuda.is_available() and not self.args.get('no_cuda') else "cpu")
     self.tokenizer = BertTokenizer.from_pretrained(self.args.get('bert_model'))
     self.model.load_state_dict(GlobalVariable.get_value('NEW_STATE_DICT'))
Пример #6
0
    def type_provide(self, query, answer_id):
        # 数据加载
        faq_dict = GlobalVariable.get_value('FAQ_DATA')

        # 问题类型判断
        query_type = faq_dict[answer_id]["专业"]
        GlobalVariable.get_value('OUTPUT')('您想询问的是否是[%s]相关问题?(是/否)' %
                                           query_type)
        if GlobalVariable.get_value('INPUT')() == '是':
            self.correct(query, query_type)
        else:
            self.error(query)
Пример #7
0
    def candidate_selecting(self, query, query_type):
        user_query_list = [query]

        # 调用模型计算,获取指定类型下,问题对应候选5个答案ID
        answer_id_list = dssm_model_infer(user_query_list,
                                          model_name='AttentionDSSM',
                                          top_k=5,
                                          query_type=query_type)

        # 数据加载
        faq_dict = GlobalVariable.get_value('FAQ_DATA')

        # 输出指定ID问题
        for index in range(len(answer_id_list[0])):
            GlobalVariable.get_value(
                'OUTPUT')(str(index + 1) + '.' +
                          faq_dict[answer_id_list[0][index]]["问题"])

        # 候选问题选择
        GlobalVariable.get_value('OUTPUT')(
            '上述问题是否包含您想问的问题,如果是,请返回相应问题序号,如果不是,请回[否]')
        respond = GlobalVariable.get_value('INPUT')()
        if respond.isdigit():
            GlobalVariable.get_value('OUTPUT')(
                faq_dict[answer_id_list[0][int(respond) - 1]]["答案"])
            self.selecting()
        else:
            self.error(query)
Пример #8
0
def dssm_model_infer(queries,
                     model_name='MultiGruDSSM',
                     top_k=1,
                     threshold=0.,
                     query_type='所有'):
    """
	dssm模型计算函数,通过参数获取问题,从指定路径加载需要匹配数据, 获取top-k个候选答案
	并根据给定阈值过滤答案
	:param queries: 问题列表
	:param model_name: 调用的模型名称
	:param top_k: 候选答案数目
	:param threshold: 相似度阈值
	:param query_type: 要匹配的问题类型
	:return: 包含答案ID的二维数组
	"""
    # 问题格式转换
    query_set = []
    for query in queries:
        query_set.append(list(query))

    # 匹配数据索引获取
    index_list = GlobalVariable.get_value('FAQ_INDEX')[query_type]

    # 匹配数据对应特征向量的获取
    t_set = []
    faq_data = GlobalVariable.get_value('FAQ_DATA')
    for index in index_list:
        t_set.append(faq_data[index]['embedding'])

    # 模型计算
    dssm = GlobalVariable.get_value('MODEL')['DSSM'][model_name + '_INFER']
    dssm.q_set = query_set
    dssm.t_set = t_set
    dssm.init_model_parameters()
    dssm.generate_data_set()
    result_prob_list, result_id_list = dssm.inference(top_k)
    answer_id_list = []
    for i in range(len(result_id_list)):
        answer_id = []
        for j in range(len(result_id_list[i])):
            if result_prob_list[i][j] <= threshold:
                break
            answer_id.append(index_list[result_id_list[i][j]])
        answer_id_list.append(answer_id)

    return answer_id_list
Пример #9
0
def dssm_model_extract_t_pre(model_name='MultiGruDSSM'):
    # 匹配数据获取
    query_dict = {'Domain': [], 'Encyclopedia': [], 'Gossip': []}
    faq_dict = GlobalVariable.get_value('FAQ_DATA')
    for key in faq_dict:
        if faq_dict[key]['专业'] == '百科':
            query_dict['Encyclopedia'].append(list(faq_dict[key]['问题']))
        elif faq_dict[key]['专业'] == '闲聊':
            query_dict['Gossip'].append(list(faq_dict[key]['问题']))
        else:
            query_dict['Domain'].append(list(faq_dict[key]['问题']))

    # 字向量字典获取
    embedding_dict = GlobalVariable.get_value('Word2Vec_CHARACTER_EMBEDDING')
    word_dict = {}
    vec_set = []
    i = 0
    for key in embedding_dict:
        word_dict[key] = i
        vec_set.append(embedding_dict[key][0])
        i += 1

    for key in query_dict:
        # 模型计算
        dssm = dssm_model[model_name](t_set=query_dict[key],
                                      dict_set=word_dict,
                                      vec_set=vec_set,
                                      is_extract=True)
        dssm.init_model_parameters()
        dssm.generate_data_set()
        dssm.build_graph()
        t_state = dssm.extract_t_pre()

        # 匹配数据对应特征向量的存储
        t_pre_dict = {}
        for i in range(len(t_state)):
            t_pre_dict[i] = list(map(float, list(t_state[i])))

        with open('./KnowledgeMemory/Embedding/DSSM/' + model_name + '/' +
                  key + 'Embedding.json',
                  'w',
                  encoding='utf-8') as file_object:
            json.dump(t_pre_dict, file_object, ensure_ascii=False, indent=2)
Пример #10
0
def dssm_model_train(model_name='MultiGruDSSM'):
    """
	dssm模型训练函数,从指定路径加载数据
	:return: None
	"""
    # 训练数据获取
    query_set = []
    answer_set = []
    faq_dict = GlobalVariable.get_value('FAQ_DATA')
    for key in faq_dict:
        query_set.append(list(faq_dict[key]['问题']))
        answer_set.append(list(faq_dict[key]['答案']))

    # 翻转问题,增加数据多样性,变成两个问题指向同一答案
    # for key in faq_dict:
    # 	query_set.append(list(faq_dict[key]['问题'])[::-1])
    # 	answer_set.append(list(faq_dict[key]['答案']))

    # 字向量字典获取
    embedding_dict = GlobalVariable.get_value('Word2Vec_CHARACTER_EMBEDDING')
    word_dict = {}
    vec_set = []
    i = 0
    for key in embedding_dict:
        word_dict[key] = i
        vec_set.append(embedding_dict[key][0])
        i += 1

    # 模型训练
    dssm = dssm_model[model_name](q_set=query_set,
                                  t_set=answer_set,
                                  dict_set=word_dict,
                                  vec_set=vec_set,
                                  batch_size=len(query_set) // 2)
    dssm.init_model_parameters()
    dssm.generate_data_set()
    dssm.build_graph()
    dssm.train()
Пример #11
0
    def types_selecting(self, query):
        # 获取问题类型
        query_types = []
        faq_list = GlobalVariable.get_value('FAQ_DATA')
        for faq_dict in faq_list:
            if faq_dict['专业'] not in query_types:
                query_types.append(faq_dict['专业'])

        # 输出问题类型
        GlobalVariable.get_value('OUTPUT')('目前已有的问题类型有:')
        for index in range(len(query_types)):
            GlobalVariable.get_value('OUTPUT')(str(index + 1) + '.' +
                                               query_types[index])

        # 问题类型选择
        GlobalVariable.get_value('OUTPUT')(
            '上述类型是否包含您想问的问题类型,如果是,请返回相应类型序号,如果不是,请回[否]')
        respond = GlobalVariable.get_value('INPUT')()
        if respond.isdigit():
            query_type = query_types[int(respond) - 1]
            self.selecting(query, query_type)
        else:
            self.error(query)
Пример #12
0
def get_answer(queries, model_name='MultiGruModel', top_k=1, threshold=0.):
    """
	不包含多轮对话,根据输入的多个问题,到指定模型中获取每个问题对应的前k个答案
	:param queries: 问题列表
	:param model_name: 调用模型名字
	:param top_k: 返回的问题数
	:param threshold: 相似度阈值
	:return: 实际答案和问题二维数组
	"""
    print('get answer---------')
    # 调用模型计算,获取每一个问题对应top-k个答案ID
    answer_id_list = dssm_model_infer(queries,
                                      model_name=model_name,
                                      top_k=top_k,
                                      threshold=threshold)

    # 数据加载
    faq_dict = GlobalVariable.get_value('FAQ_DATA')

    # 获取指定ID问题
    query_set = []
    for answer in answer_id_list:
        query_list = []
        for id in answer:
            query_list.append(faq_dict[id]["问题"])
        query_set.append(query_list)

    # 获取指定ID答案
    answer_set = []
    for answer in answer_id_list:
        answer_list = []
        for id in answer:
            answer_list.append(faq_dict[id]["答案"])
        answer_set.append(answer_list)

    return query_set, answer_set
Пример #13
0
    def answer_matching(self, query):
        user_query_list = [query]

        # 调用模型计算,获取问题对应答案ID
        answer_id_list = dssm_model_infer(user_query_list,
                                          model_name='AttentionDSSM',
                                          top_k=1)

        # 数据加载
        faq_dict = GlobalVariable.get_value('FAQ_DATA')

        # 输出指定ID答案
        GlobalVariable.get_value('OUTPUT')(
            faq_dict[answer_id_list[0][0]]["答案"])

        # 是否正确的判断
        GlobalVariable.get_value('OUTPUT')('是否是正确答案?(是/否)')
        if GlobalVariable.get_value('INPUT')() == '是':
            self.correct()
        else:
            self.error(query, answer_id_list[0][0])
Пример #14
0
# @Function :

from KnowledgeExtraction.QuestionClassificationBert.Args import BertArgs
from KnowledgeExtraction.QuestionClassificationBert.TrainClassificationModel import BertForClassification
from UtilArea import GlobalVariable

if __name__ == '__main__':

    do_train = False
    if do_train:
        #  1.训练模型
        #  设置训练模式时的Bert模型参数
        train_args = BertArgs(do_train=True, do_eval=True, no_cuda=False)
        #  加载训练类
        train_classification = BertForClassification(train_args)
        #  训练

        print('---------Start Training-------------')
        train_classification.train()
        print('---------Finish Training------------')

    #  2.加载训练好的模型进行预测
    # TODO:(1)改为意图识别(2)规范接口对接
    # 输入一句话进行问题分类或意图识别:
    GlobalVariable._init()
    PredictModel = GlobalVariable.get_value('QUESTION_CLASSIFICATION_MODEL')
    input_sentence = input('请输入问题:')
    res = PredictModel.test(input_sentence)

    # test(model, processor, args, label_list, tokenizer, device, input_sentence)
Пример #15
0
 def end_process(self):
     GlobalVariable.get_value('OUTPUT')('感谢为您解答!')