Exemplo n.º 1
0
 def get_process_data(self, file):
     test_data = ChangeDataType.json_to_dict(rootPath + "\\testdata\\uidata\\" + file)
     # print(test_data)
     # # 遍历item_process.json文件中的项目与项目问诊内容
     # for key in test_data:
     #     print(key + ':' + str(test_data[key]))
     return test_data
Exemplo n.º 2
0
    def get_eye_intent(self, api_url, target_file, test_data_file,
                       result_file):
        """
        通过抽取测试集的数据,调用意图接口,得出的测试结果,在调用函数获取每个target的准确率,召回率,F1
        :param target_file: 储存target的文件
        :param data_file: 储存接口结果数据的文件
        """
        # 获取测试集的data
        test_data = ChangeDataType.csv_to_dict(rootPath +
                                               "\\testdata\\apidata\\" +
                                               test_data_file)
        score_list = []
        re_intent_list = []
        bz_intent_list = []
        tf_list = []
        re_intent = ""
        tf = ""
        # 循环读取sentence,intent
        for idx, temp in test_data.iterrows():
            intent = temp["intention"]
            sentence = temp["sentence"]
            # 发起请求
            url = api_url.format(sentence)
            try:
                r = requests.get(url, timeout=50)
                result = r.json()
                re_intent = result["data"]["intent"]  # 获取返回data的intent
                print(re_intent)
                score = result["data"]["intent_probability"]  # 获取返回data的score
                tf = CommonFunction.get_tf(intent, re_intent)
            except Exception as e:
                score = "bad request"
                print(e)
            # self.logging.info("句子:" + sentence + "---预期意图:" + intent
            #                   + "---实际意图:" + re_intent + "---是否一致:" + tf)
            # 拼接结果数据
            score_list.append(score)
            bz_intent_list.append(intent)
            re_intent_list.append(re_intent)
            tf_list.append(tf)

        test_data["re_intent"] = re_intent_list
        test_data["score"] = score_list
        # 调用方法,拼接test_data值
        test_data = CommonFunction.get_collections(test_data, tf_list)
        now = time.strftime('%y_%m_%d-%H_%M_%S')
        # 输出excel
        test_data.to_excel(rootPath + '\\testresults\\resultfile\\' + now +
                           result_file,
                           index=False,
                           encoding="utf-8")
        GetIntent.get_intent_result(self, target_file, bz_intent_list,
                                    re_intent_list)
Exemplo n.º 3
0
    def get_pro_intent(self, api_url, target_file, test_data_file,
                       result_file):
        """
        通过抽取测试集的数据,调用意图接口,得出的测试结果,在调用函数获取每个target的准确率,召回率,F1
        :param target_file: 储存target的文件
        :param data_file: 储存接口结果数据的文件
        """
        # 获取测试集的data
        test_data = ChangeDataType.csv_to_dict(
            rootPath + "\\testdata\\apidata\\intent\\" + test_data_file)
        score_list = []
        re_intent_list = []
        exp_intent_list = []
        tf_list = []
        # 循环读取sentence,intent
        for idx, temp in test_data.iterrows():
            intent = temp["intent"]
            sentence = temp["sentence"]
            headers = {
                'Authorization':
                "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjb21wYW55X2lkIjoxLCJyb2JvdF9pZCI6MSwiZXhwIjoxNTg5MzMyNTIzfQ.Durc3V9XA99BejXc2ZOzspPU-JJCY1nUUjceICwBWNE",
                'Content-Type': 'application/x-www-form-urlencoded'
            }
            data = {
                'dialog': sentence,
                'client_id': "zltest",
            }
            url = api_url.format(sentence)  # 接口请求
            try:
                r = requests.post(url, data=data, headers=headers, timeout=50)
                result = r.json()
                re_intent = result["ner_result"]["ner"]["intent"]["Value"][0][
                    "Value"]  # 获取返回data的intent
                print(re_intent)
                tf = CommonFunction.get_tf(intent, re_intent)
            except Exception as e:
                score = "bad request"
            exp_intent_list.append(intent)
            re_intent_list.append(re_intent)
            tf_list.append(tf)

        test_data["re_intent"] = re_intent_list
        # 调用方法,拼接test_data值
        test_data = CommonFunction.get_collections(test_data, tf_list)
        now = time.strftime('%y_%m_%d-%H_%M_%S')
        # 输出excel
        test_data.to_excel(rootPath + '\\testresults\\resultfile\\' + now +
                           result_file,
                           index=False,
                           encoding="utf-8")
        GetIntent.get_intent_result(self, target_file, exp_intent_list,
                                    re_intent_list)
Exemplo n.º 4
0
    def get_pro_symptom(self, api_url, test_data_file, result_file):
        test_data = ChangeDataType.json_to_dict(rootPath + "\\testdata\\apidata\\" + test_data_file)
        key_list = list(test_data.keys())
        value_list = list(test_data.values())
        result_value_list = []
        tf_list = []
        tf = ""
        for key, value in test_data.items():
            url = api_url.format(key)
            try:
                r = requests.get(url, timeout=50)
                result = r.json()
                result_value = result["norm_symptoms"]
                tf = CommonFunction.get_tf(result_value, value[0])
            except Exception as e:
                print(e)
            # self.logging.info("口语症状:" + key + "---预期标准症状:" + value[0] + "---实际标准症状:"
            #                   + result_value + "---是否一致:" + tf)
            result_value_list.append(result_value)
            tf_list.append(tf)

        # Binary.binary_plot_curve(value_list, result_value_list)
        now = time.strftime('%y_%m_%d-%H_%M_%S')
        workbook = xlwt.Workbook()
        sheet1 = workbook.add_sheet('sheet1', cell_overwrite_ok=True)
        sheet1.write(0, 0, "口语症状")
        sheet1.write(0, 1, "预期标准症状")
        sheet1.write(0, 2, "实际标准症状")
        sheet1.write(0, 3, "是否一致")
        sheet1.write(0, 4, "总数")
        sheet1.write(1, 4, len(key_list))
        sheet1.write(0, 5, "一致数")
        sheet1.write(1, 5, tf_list.count("TRUE"))
        sheet1.write(0, 6, "不一致数")
        sheet1.write(1, 6, tf_list.count("FALSE"))
        sheet1.write(0, 7, "一致率")
        sheet1.write(1, 7, "{:.2f}%".format(tf_list.count("TRUE") / len(key_list)) * 100)
        sheet1.write(0, 8, "不一致率")
        sheet1.write(1, 8, "{:.2f}%".format(tf_list.count("FALSE") / len(key_list)) * 100)
        print("总数:", len(tf_list), " 一致数:", tf_list.count("TRUE"), " 不一致数:", tf_list.count("FALSE"), " 一致率:",
              "{:.2f}%".format(tf_list.count("TRUE") / len(tf_list) * 100), " 不一致率:",
              "{:.2f}%".format(tf_list.count("FALSE") / len(tf_list) * 100))
        for i in range(0, len(key_list)):
            sheet1.write(i + 1, 0, key_list[i])
            sheet1.write(i + 1, 1, value_list[i])
            sheet1.write(i + 1, 2, result_value_list[i])
            sheet1.write(i + 1, 3, tf_list[i])
        workbook.save(rootPath + '\\testresults\\resultfile\\' + now + result_file)
 def get_sentence_similarity(self, test_data_file, result_file,
                             compare_score_file):
     # self.logging = Logging()
     test_data = ChangeDataType.csv_to_dict(rootPath +
                                            "\\testdata\\apidata\\" +
                                            test_data_file)
     score_list = []
     re_score_list = []
     tf_list = []
     re_score = ""
     tf = ""
     lb_list = []
     for idx, temp in test_data.iterrows():
         label = int(temp["label"])
         str1 = temp["症状a"]
         str2 = temp["症状b"]
         url = "http://192.168.1.74:8233/bert_similarity/v2?str1={}&str2={}&model".format(
             str1, str2)
         try:
             r = requests.get(url, timeout=50)
             result = r.json()
             score = result["data"]["score"]
             compare_score_list = CommonFunction.get_target(
                 (self, compare_score_file))
             for i in range(0, len(compare_score_list)):
                 re_score = CommonFunction.get_re_score(
                     score, float(compare_score_list[i]))
                 tf = CommonFunction.get_tf(re_score, label)
         except Exception as e:
             score = "bad request"
             print(score)
         # self.logging.info("症状1:" + str1 + "---症状2:" + str2 + "---预期分数:"
         #                   + str(label) + "---实际分数:" + str(re_score) + "---是否一致:" + tf)
         score_list.append(score)
         re_score_list.append(re_score)
         tf_list.append(tf)
         lb_list.append(label)
     test_data["score"] = score_list
     test_data["re_score"] = re_score_list
     test_data = CommonFunction.get_collections(test_data, tf_list)
     Binary.binary_plot_curve(lb_list, re_score_list)
     now = time.strftime('%y_%m_%d-%H_%M_%S')
     test_data.to_excel(rootPath + '\\testresults\\resultfile\\' + now +
                        result_file,
                        index=False,
                        encoding="utf-8")
Exemplo n.º 6
0
    def get_answer(self, file):
        """
       查看answer中的有效数据,sentence,ner_title,ner_context并做储备,后期做调用
       :param file:answer文件
       """
        sentence_list = []
        each_ner_content = []
        each_ner_title = []
        each_ner_t_list = []
        each_ner_c_list = []
        test_data = ChangeDataType.csv_to_dict(rootPath +
                                               "\\testdata\\pregnant\\" + file)
        for idx, temp in test_data.iterrows():
            # 获取句子,实体,实体类别,意图等
            sentence = temp["句子"]
            ner = temp["实体"]
            ner_type = temp["实体类别"]
            # 获取每个句子所有的实体类别:如果一个句子有多个实体类别,中间用$分割
            ner_type_l = ner_type.split("$")
            # 判断是否有多个实体类别
            if len(ner_type_l) >= 2:
                # 根据$拆分每个ner
                ner_l = ner.split("$")
                # 循环遍历拆分每个ner和内容
                for i in range(1, len(ner_l)):
                    each_ner = ner_l[i].split(":")
                    # 拼接 每个ner的类别和内容
                    if i == 1:
                        each_ner_title.append(str(each_ner[0]))
                        each_ner_content.append(str(each_ner[1]))
                    if i >= 2:
                        each_ner_title.append("," + str(each_ner[0]))
                        each_ner_content.append("," + str(each_ner[1]))
                    # 拼接sentence,each_ner_content,each_ner_title
                    sentence_list.append(sentence)
                    each_ner_c_list.append(each_ner_content)
                    each_ner_t_list.append(each_ner_title)
            # 只有单个实体
            else:
                # 拆分ner和内容
                each_ner = ner.split("$")[1].split(":")
                # 拼接 ner的类别和内容
                each_ner_title.append(str(each_ner[0]))
                each_ner_content.append(str(each_ner[1]))
                # 拼接sentence,each_ner_content,each_ner_title
                sentence_list.append(sentence)
                each_ner_c_list.append(each_ner_content)
                each_ner_t_list.append(each_ner_title)
            # 重置 each_ner_content,each_ner_title
            each_ner_content = []
            each_ner_title = []
        print(sentence_list)
        print(each_ner_c_list)
        print(each_ner_t_list)

        # 建立表格,先将这三个list结果记录文档;sentence_list,each_ner_c_list,each_ner_t_list
        workbook = xlwt.Workbook()
        # 设置sheet名字
        sheet1 = workbook.add_sheet('answer统计结果', cell_overwrite_ok=True)
        # 设置三列title
        sheet1.write(0, 0, "sentence")
        sheet1.write(0, 1, "ner")
        sheet1.write(0, 2, "ner_context")
        # 循环填写三列内容
        for i in range(0, len(sentence_list)):
            sheet1.write(i + 1, 0, sentence_list[i])
            sheet1.write(i + 1, 1, each_ner_c_list[i])
            sheet1.write(i + 1, 2, each_ner_t_list[i])
        # 保存表格
        workbook.save(rootPath + '\\testresults\\resultfile\\' +
                      "answer_collection_result.xls")

        return sentence_list, each_ner_c_list, each_ner_t_list
Exemplo n.º 7
0
    def get_question(self, file):
        """
        查看question中的有效数据,sentence,ner_title,ner_context,intent,并做储备,后期做调用
        :param file:question文件
        """
        sentence_list = []
        intent_list = []
        each_intent_list = []
        each_ner_content = []
        each_ner_title = []
        each_ner_t_list = []
        each_ner_c_list = []
        test_data = ChangeDataType.csv_to_dict(rootPath +
                                               "\\testdata\\pregnant\\" + file)
        for idx, temp in test_data.iterrows():
            # 获取句子,实体,实体类别,意图等
            sentence = temp["句子"]
            ner = temp["实体"]
            ner_type = temp["实体类别"]
            intent = temp["意图"]
            # 获取每个句子所有的实体类别:如果一个句子有多个实体类别,中间用$分割
            ner_type_l = ner_type.split("$")
            # 判断intent在target文件中,并且不等于无
            if intent in GetRelation.get_intent_target(
                    self, "intent_target.txt") and ner_type != "无":
                # 判断是否有多个实体类别
                if len(ner_type_l) >= 2:
                    sentence_list.append(sentence)
                    # 拼接意图
                    each_intent_list.append(intent)
                    # 根据$拆分每个ner
                    ner_l = ner.split("$")
                    # 循环遍历拆分每个ner和内容
                    for i in range(1, len(ner_l)):
                        each_ner = ner_l[i].split(":")
                        # 拼接 每个ner的类别和内容
                        if i == 1:
                            each_ner_title.append(str(each_ner[0]))
                            each_ner_content.append(str(each_ner[1]))
                        if i >= 2:
                            each_ner_title.append("," + str(each_ner[0]))
                            each_ner_content.append("," + str(each_ner[1]))
                    intent_list.append(each_intent_list)
                    each_ner_c_list.append(each_ner_content)
                    each_ner_t_list.append(each_ner_title)
                # 只有单个实体
                else:
                    # ner_l = ner.split("$")
                    sentence_list.append(sentence)
                    # 拼接意图
                    each_intent_list.append(intent)
                    # 拆分ner和内容
                    each_ner = ner.split("$")[1].split(":")
                    # 拼接 ner的类别和内容
                    each_ner_title.append(str(each_ner[0]))
                    each_ner_content.append(str(each_ner[1]))
                    intent_list.append(each_intent_list)
                    each_ner_c_list.append(each_ner_content)
                    each_ner_t_list.append(each_ner_title)
            # 拼接sentence,each_intent_list,each_ner_content,each_ner_title

            # 重置each_ner_content,each_ner_title,each_intent_list
            each_ner_content = []
            each_ner_title = []
            each_intent_list = []

        print(sentence_list)
        print(each_ner_c_list)
        print(each_ner_t_list)
        print(intent_list)

        # 建立表格,先将这四个list结果记录文档;sentence_list,each_ner_c_list,each_ner_t_list,intent_list
        workbook = xlwt.Workbook()
        # 设置sheet名字
        sheet1 = workbook.add_sheet('question统计结果', cell_overwrite_ok=True)
        # 设置四列title
        sheet1.write(0, 0, "sentence")
        sheet1.write(0, 1, "ner")
        sheet1.write(0, 2, "ner_context")
        sheet1.write(0, 3, "intent")
        # 循环填写四列内容
        for i in range(0, len(sentence_list)):
            sheet1.write(i + 1, 0, sentence_list[i])
            sheet1.write(i + 1, 1, each_ner_c_list[i])
            sheet1.write(i + 1, 2, each_ner_t_list[i])
            sheet1.write(i + 1, 3, intent_list[i])
        # 保存表格
        workbook.save(rootPath + '\\testresults\\resultfile\\' +
                      "question_collection_result.xls")
        return sentence_list, each_ner_c_list, each_ner_t_list, intent_list