class ComplexMatch: def __init__(self): self.data, self.questions = self.load_complex_qa() self.predictor = Predictor(Path.relation_match_model) def predict(self, text): results = [] result = self.predictor.predict_texts(text_a_s=text, text_b_s=self.questions) for (qa_type, question), (label, confidence) in zip(self.data, result): if label == 'No': confidence = 1 - confidence results.append((qa_type, question, confidence)) top_5_result = sorted(results, key=lambda x: x[2], reverse=True)[:5] for qa_type, question, confidence in top_5_result: logger.debug('{} {} {}'.format(qa_type, question, confidence)) top_qa_type, question, top_confidence = top_5_result[0] if top_confidence < 0.97: top_qa_type = '' return top_qa_type, top_confidence def load_complex_qa(self): df = pd.read_excel(os.path.join(Path.data_path, 'complex_qa.xlsx')) data = [] for _, row in df.iterrows(): data.append((row['qa_type'], row['question'])) questions = [i[1] for i in data] return data, questions
class RelationClassifier: """ 关系分类,使用bert进行文本分类 """ def __init__(self): self.predictor = Predictor(Path.relation_classifier_model) def predict(self, text): label, confidence = self.predictor.predict_text(text_a=text) logger.debug('{} {} {}'.format(label, confidence, confidence >= 0.6)) if confidence < 0.6: label = '' return label, confidence
class RelationMatch: """ 关系匹配,使用bert文本相似度方法进行 """ def __init__(self): self.predicates = self.load_predictes() self.predictor = Predictor(Path.relation_match_model) def predict(self, text): results = [] result = self.predictor.predict_texts(text_a_s=text, text_b_s=self.predicates) for predicate, (label, confidence) in zip(self.predicates, result): if label == 'No': confidence = 1 - confidence results.append((predicate, confidence)) top_5_result = sorted(results, key=lambda x: x[1], reverse=True)[:5] for label, confidence in top_5_result: logger.debug('{} {}'.format(label, confidence)) top_label, top_confidence = top_5_result[0] if top_confidence < 0.97: top_label = '' return top_label, top_confidence def load_predictes(self): predicates = [] config_path = os.path.join(Path.dictionary, 'config.txt') with open(config_path, 'r') as r_f: for line in r_f: line = line.rstrip('\n') if not line: continue iri, *words = line.split('\t') for word in words: predicates.append(word) return predicates
import os import sys import pandas as pd from sklearn import metrics base_path = os.path.join(os.path.dirname(__file__), '..') sys.path.append(base_path) from model.text_classification import Predictor predictor = Predictor(os.path.join(base_path, 'carbot_data/model/kbqapc')) examples = predictor.processor.get_dev_examples(os.path.join(base_path, 'data/train/kbqa/predicate_classification/')) data_df = [] for example in examples: pred, confidence = predictor.predict_text(example.text_a, example.text_b) text = example.text_a true = example.label data_df.append({ 'text': text, 'true': true, 'pred': pred, '是否正确': '是' if true == pred else '否' }) df = pd.DataFrame(data_df, columns=['text', 'true', 'pred', '是否正确']) trues = df['true'] preds = df['pred'] labels = sorted(set(trues)) precision, recall, f1, num = metrics.precision_recall_fscore_support(y_true=trues, y_pred=preds, labels=labels, average=None) precision_total, recall_total, f1_total, num_total = metrics.precision_recall_fscore_support(y_true=trues, y_pred=preds,
def __init__(self): self.predicates = self.load_predictes() self.predictor = Predictor(Path.relation_match_model)
def __init__(self): self.predictor = Predictor(Path.relation_classifier_model)
def __init__(self): self.data, self.questions = self.load_complex_qa() self.predictor = Predictor(Path.relation_match_model)