Exemplos de BertSim em Python, exemplos de run_similarity.BertSim em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: kbqa_test.py Projeto: Estherbdf/SafeGo

import numpy as np
import pandas as pd
import urllib.request
import urllib.parse
import tensorflow as tf
# 连接数据库
from Data.load_dbdata import upload_data
from global_config import Logger

from run_similarity import BertSim
# 模块导入 https://blog.csdn.net/xiongchengluo1129/article/details/80453599

loginfo = Logger("recommend_articles.log", "info")
file = "./Data/NER_Data/q_t_a_testing_predict.txt"

bs = BertSim()
bs.set_mode(tf.estimator.ModeKeys.PREDICT)


def dataset_test():
    '''
    用训练问答对中的实体+属性，去知识库中进行问答测试准确率上限
    :return:
    '''
    with open(file) as f:
        total = 0
        recall = 0
        correct = 0

        for line in f:
            question, entity, attribute, answer, ner = line.split("\t")

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test.py Projeto: lixuanhng/chatbot

@Author  :   Bruce
 
@Software:   PyCharm
 
@File    :   test.py
 
@Time    :   2020-6-28
 
@Desc    :  本代码的目的是请用户在输入端输入问题，模型将与输入问题最相近的问题输出，由用户来选择是否该问题正确；
            若正确，则返回答案；若不正确，则提示用户新的问题；如果连续5个问题用户还不能确认提示的问题，则转接人工服务
'''

from run_similarity import BertSim
import tensorflow as tf

sim = BertSim()

if True:
    sim.set_mode(tf.estimator.ModeKeys.PREDICT)
    sentence2_set = [
        '认证时需要上传什么东西？', '如何进行认证？', '在哪里可以找到认证的页面？', '哪里可以认证？', '认证中要经历哪些过程？'
    ]
    print('\n' + '请输入您的问题')
    question = input('输入问题：')
    Q_sim = []
    for sen in sentence2_set:
        predict = sim.predict(question, sen)
        Q_sim.append((sen, predict[0][1]))
    Q_sim = sorted(Q_sim, key=lambda x: x[1], reverse=True)
    # print('----------------------------------------------------------------------')
    print('输入句子为【' + question + '】')

Exemplo n.º 3

0

Exibir arquivo

@Desc    :   本代码的目的是将问答数据进行导入

@Version :      7/14
                运行代码后，首先加载模型，然后等待用户端输入问题。一轮会话结束后，等待用户输入下一个问题。
'''
import os
import pathlib
from run_similarity import BertSim
import tensorflow as tf
import time


# 指定使用 GPU 跑代码，50个匹配任务1秒内可以完成
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # 指定 GPU 为0, 1
sim = BertSim()
sim.set_mode(tf.estimator.ModeKeys.PREDICT)

file_path = os.path.abspath(__file__)
# 路径为 /home/lixh/works/text/chatbot_project/Chatbot_Retrieval/Chatbot_Retrieval_model/Bert_sim
basepath = str(pathlib.Path(file_path).parent)
# print(basepath)

class similarity():
    def __init__(self, path):
        self.path = path  # 对话数据路径

    def mainProcess(self):
        """
        找出与输入问题
        :return:

Exemplo n.º 4

0

Exibir arquivo

from terminal_predict import predict_service
from Data.load_dbdata import upload_data
from datetime import time, timedelta, datetime
from kbqa_test import estimate_answer
import pandas as pd
from run_similarity import BertSim
import tensorflow as tf
from global_config import Logger

loginfo = Logger("recommend_articles.log", "info")
bs = BertSim()
bs.set_mode(tf.estimator.ModeKeys.PREDICT)

while True:
    choice = {}
    question = input("question:")
    start1 = datetime.now()
    ner = predict_service(question)
    print("识别出的实体:{}".format(ner))
    sql_e1 = "select * from nlpccQA where entity ='" + ner + "' order by length(entity) asc "
    result_e1 = list(upload_data(sql_e1))
    print("从数据库中精确找到实体{}个".format(len(result_e1)))
    result = result_e1
    if len(result_e1) == 0:
        print("精确查找没有查找到实体，采用模糊查找")
        sql_e0 = "select * from nlpccQA where entity like '%" + ner + "%' order by length(entity) asc "
        result_e0 = list(upload_data(sql_e0))
        print(result_e0)
        if len(result_e0) == 0:
            print("这个问题我也不知道呀~~")
            continue

Exemplo n.º 5

0

Exibir arquivo

Arquivo: qa_my.py Projeto: wangbq18/KBQA-BERT

def predict_online():
    """
    do online prediction. each time make prediction for one instance.
    you can change to a batch if you want.

    :param line: a list. element is: [dummy_label,text_a,text_b]
    :return:
    """

    #driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "Nic180319"))
    def convert(line):
        feature = convert_single_example(0, line, label_list,
                                         FLAGS.max_seq_length, tokenizer, 'p')
        input_ids = np.reshape([feature.input_ids],
                               (batch_size, FLAGS.max_seq_length))
        input_mask = np.reshape([feature.input_mask],
                                (batch_size, FLAGS.max_seq_length))
        segment_ids = np.reshape([feature.segment_ids],
                                 (batch_size, FLAGS.max_seq_length))
        label_ids = np.reshape([feature.label_ids],
                               (batch_size, FLAGS.max_seq_length))
        return input_ids, input_mask, segment_ids, label_ids

    global graph
    with graph.as_default():
        print(id2label)
        while True:
            print('input the test sentence:')
            sentence_l = input()
            sentence = str(sentence_l)
            start = datetime.now()
            if len(sentence) < 2:
                print(sentence)
                continue
            sentence = tokenizer.tokenize(sentence)
            # print('your input is:{}'.format(sentence))
            input_ids, input_mask, segment_ids, label_ids = convert(sentence)

            feed_dict = {
                input_ids_p: input_ids,
                input_mask_p: input_mask,
                segment_ids_p: segment_ids,
                label_ids_p: label_ids
            }
            # run session get current feed_dict result
            pred_ids_result = sess.run([pred_ids], feed_dict)
            pred_label_result = convert_id_to_label(pred_ids_result, id2label)
            print(pred_label_result)
            #todo: 组合策略
            result = strage_combined_link_org_loc(sentence,
                                                  pred_label_result[0], True)
            print('识别的实体有：{}'.format(' '.join(result)))
            #print('Time used: {} sec'.format((datetime.now() - start).seconds))

            #   yueuu
            #driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "Nic180319"))
            hd_graph = Klg()
            all_rel = hd_graph.old_allrel(driver, name=result[0])
            print('知识图谱中跟实体关联的所有关系为：', all_rel)

            time.sleep(5)

            sim = BertSim()
            sim.set_mode(tf.estimator.ModeKeys.PREDICT)
            sim_score = []
            for j in range(len(all_rel)):
                sim_score.append(sim.predict(sentence_l, all_rel[j])[0][1])
            for j in range(len(sim_score)):
                print(all_rel[j], f'similarity：{sim_score[j]}')
            max_idx = sim_score.index(max(sim_score))
            print('相似度最高的关系为：', all_rel[max_idx])

            answer = hd_graph.find(driver, result[0], all_rel[max_idx])
            if answer:
                print("answer:", answer)
            else:
                print("不知道")