Exemplo n.º 1
0
def pred_input(path_hyper_parameter=path_hyper_parameters):
    # 输入预测
    # 加载超参数
    hyper_parameters = load_json(path_hyper_parameter)
    pt = PreprocessTextMulti(path_model_dir)
    # 模式初始化和加载
    graph = Graph(hyper_parameters)
    graph.load_model()
    ra_ed = graph.word_embedding
    ques = '我要打王者荣耀'
    # str to token
    ques_embed = ra_ed.sentence2idx(ques)
    if hyper_parameters['embedding_type'] in ['bert', 'albert']:
        x_val_1 = np.array([ques_embed[0]])
        x_val_2 = np.array([ques_embed[1]])
        x_val = [x_val_1, x_val_2]
    else:
        x_val = ques_embed
    # 预测
    pred = graph.predict(x_val)
    print(pred)
    # 取id to label and pred
    pre = pt.prereocess_idx(pred[0])
    ls_nulti = []
    for ls in pre[0]:
        if ls[1] >= 0.5:
            ls_nulti.append(ls)
    print(pre[0])
    print(ls_nulti)
    while True:
        print("请输入: ")
        ques = input()
        ques_embed = ra_ed.sentence2idx(ques)
        print(ques_embed)
        if hyper_parameters['embedding_type'] in ['bert', 'albert']:
            x_val_1 = np.array([ques_embed[0]])
            x_val_2 = np.array([ques_embed[1]])
            x_val = [x_val_1, x_val_2]
        else:
            x_val = ques_embed
        pred = graph.predict(x_val)
        pre = pt.prereocess_idx(pred[0])
        ls_nulti = []
        for ls in pre[0]:
            if ls[1] >= 0.5:
                ls_nulti.append(ls)
        print(pre[0])
        print(ls_nulti)
Exemplo n.º 2
0
def pred_input(str_input, path_hyper_parameter=path_hyper_parameters):
    # 输入预测
    # 加载超参数
    hyper_parameters = load_json(path_hyper_parameter)
    pt = PreprocessTextMulti()
    # 模式初始化和加载
    graph = Graph(hyper_parameters)
    graph.load_model()
    ra_ed = graph.word_embedding
    ques = str_input
    # str to token
    ques_embed = ra_ed.sentence2idx(ques)
    if hyper_parameters['embedding_type'] == 'bert':
        x_val_1 = np.array([ques_embed[0]])
        x_val_2 = np.array([ques_embed[1]])
        x_val = [x_val_1, x_val_2]
    else:
        x_val = ques_embed
    # 预测
    pred = graph.predict(x_val)
    print(pred)
    # 取id to label and pred
    pre = pt.prereocess_idx(pred[0])
    ls_nulti = []
    for ls in pre[0]:
        if ls[1] >= 0.73:
            ls_nulti.append(ls)
    print(str_input)
    print(pre[0])
    print(ls_nulti)
Exemplo n.º 3
0
def pred_tet(path_hyper_parameter=path_hyper_parameters,
             path_test=None,
             rate=1.0):
    # 测试集的准确率
    hyper_parameters = load_json(path_hyper_parameter)
    if path_test:  # 从外部引入测试数据地址
        hyper_parameters['data']['val_data'] = path_test
    time_start = time.time()
    # graph初始化
    graph = Graph(hyper_parameters)
    print("graph init ok!")
    graph.load_model()
    print("graph load ok!")
    ra_ed = graph.word_embedding
    # 数据预处理
    pt = PreprocessText()
    y, x = read_and_process(hyper_parameters['data']['val_data'])
    # 取该数据集的百分之几的语料测试
    len_rate = int(len(y) * rate)
    x = x[1:len_rate]
    y = y[1:len_rate]
    y_pred = []
    count = 0
    for x_one in x:
        count += 1
        ques_embed = ra_ed.sentence2idx(x_one)

        if hyper_parameters['embedding_type'] in ['bert', 'albert']:
            x_val_1 = np.array([ques_embed[0]])
            x_val_2 = np.array([ques_embed[1]])
            x_val = [x_val_1, x_val_2]
        elif hyper_parameters['embedding_type'] == 'xlnet':
            x_val_1 = np.array([ques_embed[0]])
            x_val_2 = np.array([ques_embed[1]])
            x_val_3 = np.array([ques_embed[2]])
            x_val = [x_val_1, x_val_2, x_val_3]
        else:
            x_val = ques_embed
        # 预测
        pred = graph.predict(x_val)
        pre = pt.prereocess_idx(pred[0])
        label_pred = pre[0][0][0]
        if count % 1000 == 0:
            print(label_pred)
        y_pred.append(label_pred)

    print("data pred ok!")
    # 预测结果转为int类型
    index_y = [pt.l2i_i2l['l2i'][i] for i in y]
    index_pred = [pt.l2i_i2l['l2i'][i] for i in y_pred]
    target_names = [
        pt.l2i_i2l['i2l'][str(i)] for i in list(set((index_pred + index_y)))
    ]
    # 评估
    report_predict = classification_report(index_y,
                                           index_pred,
                                           target_names=target_names,
                                           digits=9)
    print(report_predict)
    print("耗时:" + str(time.time() - time_start))
Exemplo n.º 4
0
def pred_input(path_hyper_parameter=path_hyper_parameters):
    # 输入预测
    # 加载超参数
    hyper_parameters = load_json(path_hyper_parameter)
    pt = PreprocessText()
    # 模式初始化和加载
    graph = Graph(hyper_parameters)
    graph.load_model()
    ra_ed = graph.word_embedding
    ques = '我要打王者荣耀'
    # str to token
    ques_embed = ra_ed.sentence2idx(ques)
    if hyper_parameters['embedding_type'] == 'bert':
        x_val_1 = np.array([ques_embed[0]])
        x_val_2 = np.array([ques_embed[1]])
        x_val = [x_val_1, x_val_2]
    else:
        x_val = ques_embed
    # 预测
    pred = graph.predict(x_val)
    # 取id to label and pred
    pre = pt.prereocess_idx(pred[0])
    print(pre)
    while True:
        print("请输入: ")
        ques = input()
        ques_embed = ra_ed.sentence2idx(ques)
        print(ques_embed)
        if hyper_parameters['embedding_type'] == 'bert':
            x_val_1 = np.array([ques_embed[0]])
            x_val_2 = np.array([ques_embed[1]])
            x_val = [x_val_1, x_val_2]
        else:
            x_val = ques_embed
        pred = graph.predict(x_val)
        pre = pt.prereocess_idx(pred[0])
        print(pre)
Exemplo n.º 5
0
def evaluate(path_hyper_parameter=path_hyper_parameters, rate=1.0):
    # 输入预测
    # 加载超参数
    hyper_parameters = load_json(path_hyper_parameter)
    pt = PreprocessTextMulti()
    # 模式初始化和加载
    graph = Graph(hyper_parameters)
    graph.load_model()
    ra_ed = graph.word_embedding
    # get validation data
    ques_list, val_list, que, val = pt.preprocess_label_ques_to_idx(
        hyper_parameters['embedding_type'],
        hyper_parameters['data']['val_data'],
        ra_ed,
        rate=rate,
        shuffle=True)
    print(len(ques_list))
    print("que:", len(que))
    # print(val)

    # str to token
    ques_embed_list = []
    count = 0
    acc_count = 0
    not_none_count = 0
    not_none_acc_count = 0
    sum_iou = 0
    sum_all_iou = 0
    for index, que___ in enumerate(que):
        # print("原句 ", index, que[index])
        # print("真实分类 ", index, val[index])
        # print("ques: ", ques)
        ques_embed = ra_ed.sentence2idx(que[index])
        if hyper_parameters['embedding_type'] == 'albert':
            x_val_1 = np.array([ques_embed[0]])
            x_val_2 = np.array([ques_embed[1]])
            ques_embed = [x_val_1, x_val_2]
        else:
            x_val = ques_embed
        # print("ques_embed: ", ques_embed)
        if hyper_parameters['embedding_type'] == 'bert':
            x_val_1 = np.array([ques_embed[0]])
            x_val_2 = np.array([ques_embed[1]])
            x_val = [x_val_1, x_val_2]
        else:
            x_val = ques_embed
        # print("x_val", x_val)
        ques_embed_list.append(x_val)
        # 预测
        pred = graph.predict(x_val)
        # print(pred)
        # 取id to label and pred
        pre = pt.prereocess_idx(pred[0])
        # print("pre",pre)
        ls_nulti = []
        threshold = 0.44
        top_threshold = 0
        for i, ls in enumerate(pre[0]):
            if i == 0 or ls[1] > threshold:
                ls_nulti.append(ls)
                top_threshold = ls[1]
            elif abs(ls[1] - top_threshold) < top_threshold / 4.0:
                ls_nulti.append(ls)
        # print("预测结果", index, pre[0])
        # print(ls_nulti)
        res = cal_acc(ls_nulti, val[index].split(","))
        res_iou, res_all_iou = cal_iou(ls_nulti, val[index].split(","))
        sum_iou += res_iou
        sum_all_iou += res_all_iou
        if res:
            if val[index] != "无":
                not_none_acc_count += 1
            acc_count += 1
        else:
            print("原句 ", index, que[index])
            print("真实分类 ", index, val[index])
            print("pre ", pre)
            print("iou ", res_iou)
        count += 1
        if val[index] != "无":
            not_none_count += 1
    print("acc: ", acc_count / count)
    print("not none acc: ", not_none_acc_count / not_none_count)
    print("average iou: ", sum_iou / sum_all_iou)
    # log
    append_log(hyper_parameters, acc_count / count,
               not_none_acc_count / not_none_count, threshold)
Exemplo n.º 6
0
                                     'lr': 1e-4,
                                     'l2': 1e-9,
                                     'activate_classify': 'softmax', # 还可以填'random'、 'bert' or 'word2vec"
                                     'embedding_type': 'random',
                                     'is_training': False,
                                     'model_path': path_model_fast_text_baiduqa_2019,},
                         'embedding':{ 'embedding_type': 'random',
                                      'corpus_path': path_embedding_random_char,
                                      'level_type': 'char',
                                      'embed_size': 300,
                                      'len_max': 50,},
                         }
    # ns = np.array([1,2,3,4])
    # print(type(ns))
    pt = PreprocessText
    graph = Graph(hyper_parameters)
    graph.load_model()
    ra_ed = graph.word_embedding
    ques = '你好呀'
    ques_embed = ra_ed.sentence2idx(ques)
    pred = graph.predict(np.array([ques_embed]))
    pre = pt.prereocess_idx(pred[0])
    print(pre)
    while True:
        print("请输入: ")
        ques = input()
        ques_embed = ra_ed.sentence2idx(ques)
        pred = graph.predict(np.array([ques_embed]))
        pre = pt.prereocess_idx(pred[0])
        print(pre)
pt = PreprocessText(path_model_dir)
# 模式初始化和加载
graph = Graph(hyper_parameters)
graph.load_model()
ra_ed = graph.word_embedding
ques = '我要打王者荣耀'
# str to token
ques_embed = ra_ed.sentence2idx(ques)
if hyper_parameters['embedding_type'] in ['bert', 'albert']:
    x_val_1 = np.array([ques_embed[0]])
    x_val_2 = np.array([ques_embed[1]])
    x_val = [x_val_1, x_val_2]
else:
    x_val = ques_embed
# 预测
pred = graph.predict(x_val)
# 取id to label and pred
pre = pt.prereocess_idx(pred[0])


@app.route("/nlp/textcnn/predict", methods=["GET", "POST"])
def predict():
    ques = request.args.get("text", "")
    ques_embed = ra_ed.sentence2idx(ques)
    print(ques_embed)
    if hyper_parameters['embedding_type'] in ['bert', 'albert']:
        x_val_1 = np.array([ques_embed[0]])
        x_val_2 = np.array([ques_embed[1]])
        x_val = [x_val_1, x_val_2]
    else:
        x_val = ques_embed
Exemplo n.º 8
0
def evaluate(path_hyper_parameter=path_hyper_parameters, rate=1.0):
    # 输入预测
    # 加载超参数
    hyper_parameters = load_json(path_hyper_parameter)
    pt = PreprocessTextMulti()
    # 模式初始化和加载
    graph = Graph(hyper_parameters)
    graph.load_model()
    ra_ed = graph.word_embedding
    print(hyper_parameters['data']['test_data'])

    # init confusion table
    dict_all=initConfusion()

    # get validation data
    ques_list, val_list, que, val = pt.preprocess_label_ques_to_idx(hyper_parameters['embedding_type'],
                                                                    hyper_parameters['data']['test_data'],
                                                                    ra_ed, rate=rate, shuffle=True)
    print(len(ques_list))
    print("que:",len(que))
    # print(val)

    # str to token
    ques_embed_list = []
    count = 0
    acc_count = 0
    not_none_count = 0
    not_none_acc_count = 0
    sum_iou = 0
    density_correct=0
    sum_all_iou=0
    for index, que___ in enumerate(que):
        # print("原句 ", index, que[index])
        # print("真实分类 ", index, val[index])
        # print("ques: ", ques)
        ques_embed = ra_ed.sentence2idx(que[index])
        if hyper_parameters['embedding_type'] == 'albert':
            x_val_1 = np.array([ques_embed[0]])
            x_val_2 = np.array([ques_embed[1]])
            ques_embed = [x_val_1, x_val_2]
        else:
            x_val = ques_embed
        # print("ques_embed: ", ques_embed)
        if hyper_parameters['embedding_type'] == 'bert':
            x_val_1 = np.array([ques_embed[0]])
            x_val_2 = np.array([ques_embed[1]])
            x_val = [x_val_1, x_val_2]
        else:
            x_val = ques_embed
        # print("x_val", x_val)
        ques_embed_list.append(x_val)
        # 预测
        pred = graph.predict(x_val)
        # print(pred)
        # 取id to label and pred
        pre = pt.prereocess_idx(pred[0])
        # print("pre",pre)
        ls_nulti = []
        threshold = 0.60
        top_threshold = 0
        has_scope=False
        for i, ls in enumerate(pre[0]):
            if ls[0] in ['多发','散发','无']:
                if not has_scope:
                    has_scope=True
                    ls_nulti.append(ls)
                    if ls[0] in val[index].split(","):
                        dict_all[ls[0]]['TP'] += 1
                    else:
                        dict_all[ls[0]]['FN'] += 1
                else:
                    if ls[0] in val[index].split(","):
                        dict_all[ls[0]]['FP']+=1
                    else:
                        dict_all[ls[0]]['TN']+=1
            if ls[0] not in ['多发','散发','无']:
                if ls[1] > threshold:
                    ls_nulti.append(ls)
                    if ls[0] in val[index].split(","):
                        dict_all[ls[0]]['TP']+=1
                    else:
                        dict_all[ls[0]]['FP']+=1
                else:
                    if ls[0] in val[index].split(","):
                        dict_all[ls[0]]['FN']+=1
                    else:
                        dict_all[ls[0]]['TN']+=1

        # print("预测结果", index, pre[0])
        # print(ls_nulti)
        res = cal_acc(ls_nulti, val[index].split(","))
        res_iou = cal_iou(ls_nulti, val[index].split(","))
        sum_iou += res_iou
        # sum_all_iou+=res_all_iou
        density_res=checkDensity(ls_nulti,val[index].split(","))
        if density_res:
            density_correct+=1
        if res:
            acc_count += 1
        else:
            print("原句 ", index, que[index])
            print("真实分类 ", index, val[index])
            print("pre ", pre)
            print("iou ", res_iou)
            print(ls_nulti)
        count += 1
        if val[index] != "无":
            not_none_count += 1
        # print("softmax count",softmax_count)
    print("acc: ", acc_count / count)
    # print("not none acc: ", not_none_acc_count / not_none_count)
    print("average iou: ", sum_iou / count)
    print('density correct', density_correct/count)
    print(dict_all)
    import prettytable as pt
    tb = pt.PrettyTable()
    tb.field_names = [" ", "Recall", "Precision", "TP","FP","TN","FN"]
    for item in dict_all:
        if dict_all[item]['TP']+dict_all[item]['FN']==0:
            recall=1
        else:
            recall = dict_all[item]['TP']/(dict_all[item]['TP']+dict_all[item]['FN'])
        if dict_all[item]['TP']+dict_all[item]['FP']==0:
            precision=1
        else:
            precision = dict_all[item]['TP'] / (dict_all[item]['TP'] + dict_all[item]['FP'])
        # print(item,recall,precision)
        tb.add_row([item, recall, precision, dict_all[item]['TP'],dict_all[item]['FP'],dict_all[item]['TN'],dict_all[item]['FN']])
    print(tb)
    # log
    append_log(hyper_parameters, acc_count / count, not_none_acc_count / not_none_count, threshold)