#!/usr/bin/python # -*- coding: utf-8 -*- from yujv_process import process_line from load_mysql import load_dataset from GBDT_cidai import dafu_prediction #数据匹配 disease_list = load_dataset("疾病库") #调用疾病库 question_list = [] #问答系列列表 line = question_list[-1] #调最新的一句问话 line = process_line(line) #语句预处理 for i in line: if i in disease_list: leibie, dafu = dafu_prediction(line) print(leibie + '\n' + dafu) else: question_list = question_list[:-1] for line1 in question_list[-1::-1]: line1 = process_line(line1) for i in line1: if i in disease_list: line2 = line.append(i) leibie, dafu = dafu_prediction(line2) print(leibie + '\n' + dafu) break else: print("不好意思没有找到您所说的疾病名称") disease_name = input("请输入您描述的疾病名称:") line3 = line.append(disease_name) leibie, dafu = dafu_prediction(line3) print(leibie + '\n' + dafu)
#!/usr/bin/python # -*- coding: utf-8 -*- #导入常用的函数包 import random from sklearn.model_selection import train_test_split from preprocess import preprocess from preprocess import preprocess1 from classifierGBDT import TextClassifier from load_mysql import load_dataset from load_mysql import processing_null from cos import ComputerNearestNeighbor #读取数据并预处理 df_bingyin_list = load_dataset('bingyin') df_zhenduan_list = load_dataset('zhenduan') df_zhengzhuang_list = load_dataset('zhengzhuang') df_zhiliao_list = load_dataset('zhiliao') #对各个类别数据进行空值符处理 df_bingyin_word = processing_null(df_bingyin_list)[0:1000] # print(len(df_bingyin_word)) df_zhenduan_word = processing_null(df_zhenduan_list)[0:1000] df_zhengzhuang_word = processing_null(df_zhengzhuang_list)[0:1000] df_zhiliao_word = processing_null(df_zhiliao_list)[0:1000] bingyin = df_bingyin_word.values.tolist() zhenduan = df_zhenduan_word.values.tolist() zhengzhuang = df_zhengzhuang_word.values.tolist() zhiliao = df_zhiliao_word.values.tolist() #分别把各个类别数据整理成一个列表形式 sentences = []
from load_mysql import load_dataset from load_mysql import processing_null import pandas as pd import gensim import jieba from random import shuffle import multiprocessing stopwords = pd.read_csv('data/stopwords.txt', index_col=False, quoting=3, sep="\t", names=['stopword'], encoding='utf-8') stopwords = stopwords['stopword'].values #导入停用词 #读取数据并预处理 df_bingyin = load_dataset('bingyin') df_zhenduan = load_dataset('zhenduan') df_zhengzhuang = load_dataset('zhengzhuang') df_zhiliao = load_dataset('zhiliao') df_bingyin = processing_null(df_bingyin)[0:1000] df_zhenduan = processing_null(df_zhenduan)[0:1000] df_zhengzhuang = processing_null(df_zhengzhuang)[0:1000] df_zhiliao = processing_null(df_zhiliao)[0:1000] frames = [df_bingyin, df_zhenduan, df_zhengzhuang, df_zhiliao] df = pd.concat(frames, axis=0, join='outer') # print(len(df)) # print(df.head()) # for line in df: # print(line)