def get_ans(query1, depart): df = pd.read_csv("./data/total_disease2_count_FINAL.csv", encoding='cp949').fillna('Null') df.rename(columns = {'symptoms': 'content'}, inplace = True) if depart != "없음": df = df[df["subject"].isin([depart])].reset_index() else: df = df[df["subject"].isin(classification_(query1))].reset_index() retriever = BM25Retriever(lowercase=False, tokenizer = stop_word ) retriever.fit(df) query1 = " ".join(stop_word(query1)) rp = retriever.predict(query1) for i in rp: rp[i] = float(rp[i]) * df.iloc[i]['count'] rp = OrderedDict(sorted(rp.items(), key=lambda x: x[1], reverse=True)) top_5 = list(rp.keys())[:5] list_score = list(map(float,list(rp.values())))[:5] if list_score[0] != 0: percent_score = [round((i / sum(list_score)) ,2) * 100 for i in list_score] else: percent_score = [20 for _ in range(5)] top_5_df = df.iloc[top_5,[0,1,2]] top_5_df["score"] = percent_score return top_5_df
def Pos_extract(Data) : Noun = [] Extract_a = json.loads(str(Data.data,"utf-8"))['return_object']['sentence'] for i in range(len(Extract_a)) : Extract_b = dict(Extract_a[i]) for i in range(len(Extract_b['morp'])) : if (Extract_b['morp'][i]['type'] =='NNG' or Extract_b['morp'][i]['type'] =='NNP') or Extract_b['morp'][i]['type'] =='VV': Noun.append(Extract_b['morp'][i]['lemma']) return " ".join(Noun) df = pd.read_csv('data/bnpp_newsroom_v1.1/jungchat_result_191031.csv',converters={'paragraphs': literal_eval}) retriever = BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None) retriever_temp= BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None) #retriever_doc= BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None) retriever.fit(df) df = filter_paragraphs(df,min_length=10) cdqa_pipeline = QAPipeline(reader='models/bert_qa_korquad_vCPU.joblib') best_idx_scores='' while 100: query=input('입력창:') if query=='quit': break
from cdqa.utils.evaluation import f1_score, exact_match_score # dataset df = pd.read_csv('./data/data_augmentation.csv', converters={'paragraphs': literal_eval},encoding='utf-8') # paragraphs 새로 정의 : Title + Paragraph df['paragraphs_old'] = df['paragraphs'] df['paragraphs'] = df.apply(lambda row: [row['title']] + row['paragraphs_old'], axis=1).copy() data = pd.read_csv('./data/data_augmentation.csv',encoding='utf-8') data_sampling = data.sample(100,random_state=66) from cdqa.retriever import TfidfRetriever, BM25Retriever cdqa_pipeline = QAPipeline(reader='bert_qa_multi_epoch3.joblib', retrieve_by_doc=True,retriever='bm25') cdqa_pipeline.fit_retriever(df=df) cdqa_pipeline.cuda() retriever = BM25Retriever(ngram_range=(1,2), max_df=0.8, min_df=3, stop_words=None,lowercase=True, top_n=5) retriever.fit(df=df) def f1(dataframe,dataframe2): number = 0 exact_number = 0 # score = [] answer_list=[] while number < 100: # print("Question?") question = dataframe2.iloc[number,2] # 질문 # question = input() best_idx_scores = retriever.predict(question) prediction = df.loc[best_idx_scores.keys()]['paragraphs'].apply(lambda x:x[1]).tolist()[0].replace(u'\xa0',u'') number+=1 answer_list.append(prediction) return answer_list
from flask import Flask, request, jsonify from ast import literal_eval import pandas as pd from cdqa.utils.filters import filter_paragraphs from cdqa.utils.download import download_model, download_bnpp_data from cdqa.pipeline.cdqa_sklearn import QAPipeline from cdqa.retriever import BM25Retriever from ETRI import * import time from khaiii_def import * app = Flask(__name__) df = pd.read_csv('jungchat_result_191102.csv',converters={'paragraphs': literal_eval}) cdqa_pipeline = QAPipeline(reader='bert_qa_korquad_vCPU.joblib')#모델을 불러온다 retriever = BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None)#문서와의 유사도를 구하기위한 리트리버 retriever_temp= BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None)#문장과의 유사도를 구하기 위한 리트리버 retriever.fit(df)#모든 문서의 내용을 담는다 df = filter_paragraphs(df) best_idx_scores = '' def text_tranform(text) : return '\n'.join(text.split(', ')) def make_query(text) : dataSend = { "version": "2.0", "template": { "outputs": [{ "simpleText":{ "text" : text} }] }