Exemplos de QAPipeline em Python, exemplos de cdqa.pipeline.cdqa_sklearn.QAPipeline em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: services.py Projeto: rezatavakoli/questions_answering

def create_answers(deck_id, query):
    decks_collection = db["decks"]
    slides_collection = db["slides"]
    decks = decks_collection.find({"_id": deck_id})
    target_deck = decks[0]
    author_id = target_deck["user"]
    author_slides = slides_collection.find({"user": author_id})

    df = pd.DataFrame()

    for slide in author_slides:
        revision = slide["revisions"][-1]
        usages = revision["usage"]
        for usage in usages:
            if usage["id"] == deck_id:
                content = html2text.html2text(revision["content"])
                paragraphs = content.split('\n\n')
                df = df.append(
                    {
                        "date": revision["timestamp"],
                        "title": revision["title"],
                        "category": "Infromation",
                        "link": "",
                        "abstract": "",
                        "paragraphs": paragraphs,
                        "revision_id": revision["id"],
                        "slide_id": slide["_id"]
                    },
                    ignore_index=True)
                break

    download_model(model='bert-squad_1.1', dir='./models')

    # df = filter_paragraphs(df)
    cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib',
                               max_df=0.95,
                               min_df=3)
    cdqa_pipeline.fit_retriever(df)

    predictions = cdqa_pipeline.predict(
        query, n_predictions=5)  #retriever_score_weight=0.99

    answers = []
    i = 1
    for prediction in predictions:
        slide_id = df.loc[df["title"] == prediction[1]].iloc[0]['slide_id']
        revision_id = df.loc[df["title"] ==
                             prediction[1]].iloc[0]['revision_id']
        answers.append({
            "slide_id": int(slide_id),
            "revision_id": int(revision_id),
            "answer": prediction[0],
            "title": prediction[1],
            "paragraph": prediction[2],
            "score": prediction[3]
        })
        i += 1

    return answers

Exemplo n.º 2

0

Exibir arquivo

def question(text, query):
    print(text)
    test = []
    for i in sent_tokenize(text):
        if len(i) > 2:
            test.append(i)

    n = 4
    # using list comprehension
    final = [test[i * n:(i + 1) * n] for i in range((len(test) + n - 1) // n)]
    title_s = []
    for j in range(len(final)):
        title_s.append(f'Title{j}')

    data = [title_s, final]
    df3 = pd.DataFrame(data=data)
    df3 = df3.transpose()
    df3.columns = ['title', 'paragraphs']
    print(df3)
    #st.text('Hold on this will take some time')

    from ast import literal_eval

    from cdqa.utils.filters import filter_paragraphs
    from cdqa.utils.download import download_model, download_bnpp_data
    from cdqa.pipeline.cdqa_sklearn import QAPipeline

    # Download data and models
    #download_bnpp_data(dir='./data/bnpp_newsroom_v1.1/')
    #download_model(model='bert-squad_1.1', dir='./models')

    # Loading data and filtering / preprocessing the documents
    df = pd.read_csv(
        'D:/devjams/Machine-Learning-Web-Apps-master/NLPIffy_NLP_Based_SpaCy_Flask_App&_API/cdQA/data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv',
        converters={'paragraphs': literal_eval})
    df = filter_paragraphs(df)
    #st.text('Please Wait. We are looking for the answer to your question')
    # Loading QAPipeline with CPU version of BERT Reader pretrained on SQuAD 1.1
    cdqa_pipeline = QAPipeline(
        reader=
        'D:/devjams/Machine-Learning-Web-Apps-master/NLPIffy_NLP_Based_SpaCy_Flask_App&_API/bert_qa_vGPU-sklearn.joblib'
    )

    # Fitting the retriever to the list of documents in the dataframe
    cdqa_pipeline.fit_retriever(df3)
    print(query)
    #st.text('Almost done.......')
    #query = 'Intellectual Property Rights'
    try:
        prediction = cdqa_pipeline.predict(query)
    except Exception as e:
        print(e)
    #st.text(prediction[2])
    return prediction[2]

Exemplo n.º 3

0

Exibir arquivo

def find_answer(question):
    # Set your path to pdf directory
    df = pdf_converter(directory_path='pdf_folder/')
    cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib')
    cdqa_pipeline.fit_retriever(df)
    query = question + '?'
    prediction = cdqa_pipeline.predict(query)

    # print('query: {}\n'.format(query))
    # print('answer: {}\n'.format(prediction[0]))
    # print('title: {}\n'.format(prediction[1]))
    # print('paragraph: {}\n'.format(prediction[2]))
    return prediction[0]

Exemplo n.º 4

0

Exibir arquivo

def search_view(request):
    if request.POST:
        question = request.POST.get('question')
        for idx, url in enumerate(
                search(question, tld="com", num=10, stop=3, pause=2)):
            crawl_result(url, idx)
        # change path to pdfs folder
        df = pdf_converter(directory_path='/path/to/pdfs')
        cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib')
        cdqa_pipeline.fit_retriever(df)
        prediction = cdqa_pipeline.predict(question)
        data = {'answer': prediction[0]}
        return JsonResponse(data)
    return render(request, 'search.html')

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_pipeline.py Projeto: wbj0110/cdQA

def execute_pipeline(query):
    download_bnpp_data('./data/bnpp_newsroom_v1.1/')
    download_model('bert-squad_1.1', dir='./models')
    df = pd.read_csv('./data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv',
                     converters={'paragraphs': literal_eval})
    df = filter_paragraphs(df)

    cdqa_pipeline = QAPipeline(reader='models/bert_qa_vCPU-sklearn.joblib')
    cdqa_pipeline.fit_retriever(X=df)

    prediction = cdqa_pipeline.predict(X=query)

    result = (prediction[0], prediction[1])

    return result

Exemplo n.º 6

0

Exibir arquivo

def execute_pipeline(query):
    df = pd.read_csv('data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv',
                     converters={'paragraphs': literal_eval})
    df = filter_paragraphs(df)

    cdqa_pipeline = QAPipeline(
        reader='models/bert_qa_vCPU-sklearn.joblib')
    cdqa_pipeline.fit(X=df)
    cdqa_pipeline.reader.output_dir = None

    prediction = cdqa_pipeline.predict(X=query)

    result = (prediction[0], prediction[1])

    return result

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test_evaluation.py Projeto: vinaybysani/cdQA

def test_evaluate_pipeline():

    download_bnpp_data("./data/bnpp_newsroom_v1.1/")
    download_model("bert-squad_1.1", dir="./models")
    df = pd.read_csv(
        "./data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv",
        converters={"paragraphs": literal_eval},
    )
    df = filter_paragraphs(df)

    test_data = {
        "data": [
            {
                "title": "BNP Paribas’ commitment to universities and schools",
                "paragraphs": [
                    {
                        "context": "Since January 2016, BNP Paribas has offered an Excellence Program targeting new Master’s level graduates (BAC+5) who show high potential. The aid program lasts 18 months and comprises three assignments of six months each. It serves as a strong career accelerator that enables participants to access high-level management positions at a faster rate. The program allows participants to discover the BNP Paribas Group and its various entities in France and abroad, build an internal and external network by working on different assignments and receive personalized assistance from a mentor and coaching firm at every step along the way.",
                        "qas": [
                            {
                                "answers": [
                                    {"answer_start": 6, "text": "January 2016"},
                                    {"answer_start": 6, "text": "January 2016"},
                                    {"answer_start": 6, "text": "January 2016"},
                                ],
                                "question": "Since when does the Excellence Program of BNP Paribas exist?",
                                "id": "56be4db0acb8001400a502ec",
                            }
                        ],
                    }
                ],
            }
        ],
        "version": "1.1",
    }

    with open("./test_data.json", "w") as f:
        json.dump(test_data, f)

    cdqa_pipeline = QAPipeline(reader="./models/bert_qa_vCPU-sklearn.joblib", n_jobs=-1)
    cdqa_pipeline.fit_retriever(X=df)

    eval_dict = evaluate_pipeline(cdqa_pipeline, "./test_data.json", output_dir=None)

    assert eval_dict["exact_match"] > 0.8

    assert eval_dict["f1"] > 0.8

Exemplo n.º 8

0

Exibir arquivo

Arquivo: qa_interface.py Projeto: gandalf012/DistilBert-question-answering-system

def get_distilbert_model():
    if not os.path.exists('./models'):
        os.makedirs('./models')
    if not os.path.exists('./models/distilbert_qa.joblib'):
        download_model(model="{}-squad_1.1".format('distilbert'),
                       dir='./models')
    return QAPipeline(reader='./models/distilbert_qa.joblib',
                      max_df=1.0,
                      min_df=1)

Exemplo n.º 9

0

Exibir arquivo

def ask():
    name = request.form['btn-input']

    #print(name)
    f = open('current.txt')
    file1 = f.read().rstrip()
    f.close()
    cdqa_pipeline = QAPipeline(reader='models/bert_qa_vCPU-sklearn.joblib')
    row = file_open(file1)
    df = pd.DataFrame(row)
    df = df.T
    df.columns = ['title', 'paragraphs']
    #print(df.head())
    # Fitting the retriever to the list of documents in the dataframe
    cdqa_pipeline.fit_retriever(df)
    prediction = cdqa_pipeline.predict(name)
    ret = [name, prediction[0], prediction[1], prediction[2]]
    speech = ret[1] + "\n\n Related Paragraph" + ret[3]
    print('This is error output', speech)
    #return speech
    return render_template('index.html', value1=name, value2=speech)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: test_pipeline.py Projeto: vinaybysani/cdQA

def execute_pipeline(query, n_predictions=None):
    download_bnpp_data("./data/bnpp_newsroom_v1.1/")
    download_model("bert-squad_1.1", dir="./models")
    df = pd.read_csv(
        "./data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv",
        converters={"paragraphs": literal_eval},
    )
    df = filter_paragraphs(df)

    cdqa_pipeline = QAPipeline(reader="models/bert_qa_vCPU-sklearn.joblib")
    cdqa_pipeline.fit_retriever(X=df)
    if n_predictions is not None:
        predictions = cdqa_pipeline.predict(X=query,
                                            n_predictions=n_predictions)
        result = []

        for answer, title, paragraph in predictions:
            prediction = (answer, title)
            result.append(prediction)
        return result
    else:
        prediction = cdqa_pipeline.predict(X=query)
        result = (prediction[0], prediction[1])
        return result

Exemplo n.º 11

0

Exibir arquivo

        for i in range(len(Extract_b['morp'])) : 
            if (Extract_b['morp'][i]['type'] =='NNG' or Extract_b['morp'][i]['type'] =='NNP') or Extract_b['morp'][i]['type'] =='VV': 
                Noun.append(Extract_b['morp'][i]['lemma'])
    return " ".join(Noun)

df = pd.read_csv('data/bnpp_newsroom_v1.1/jungchat_result_191031.csv',converters={'paragraphs': literal_eval})


retriever = BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None)
retriever_temp= BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None)
#retriever_doc= BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None)
retriever.fit(df)

df = filter_paragraphs(df,min_length=10)

cdqa_pipeline = QAPipeline(reader='models/bert_qa_korquad_vCPU.joblib')


best_idx_scores=''

while 100:
    query=input('입력창:')
    if query=='quit':
        break
    POS_query=ETRI_POS_Tagging(query)
    #print(list(list(retriever.predict(ETRI_POS_Tagging(query)).values())[0])[0])
    if max(retriever.predict(POS_query).values())>=1.5 or not best_idx_scores:
        
        if best_idx_scores:#유사도가 있고 두번째 
            if max(retriever_temp.predict(POS_query).values())<1.5:#문서와의 유사도가 낮을때
                    best_idx_scores = retriever.predict(POS_query)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: qa_index.py Projeto: viqee/qa

    cfgs = return reader.read()
configs = json.loads(cfgs)

question = configs['question']

data_directory = '/data/'
models_directory = '/models/'

# download_squad(dir = './' + data_directory)
download_bnpp_data(dir = './' + data_directory)
# download_model('distilbert-squad_1.1', dir = './' + models_directory)
download_model('bert-squad_1.1', dir = './' + models_directory)

df = pandas.read_csv(data_directory + '/bnpp_paribas/-??-.csv', converter = {'paragraphs': ast.literal_evl})
df = filter_paragraphs(df)
cdqa_pipeline = QAPipeline(reader = models_directory + '/bert_qa/bert_qa.joblib')
cdqa_pipeline.fit_retriever(q = df) 
# cdqa_pipeline.fit_reader('path to squad like dataset . json')
prediction = cdqa_pipeline.predict(q = question, n_prediction = ?) # ? = predictions
# cdqa_pipeline.dump_reader('path to save . joblib') # save reader model

query = 'query: {}\n'.format(query),
answer = 'answer: {}\n'.format(prediction[0]),
title = 'title: {}\n'.format(prediction[1]),
paragraph = 'paragraph: {}\n'.format(prediction[2])

result = query, answer, title, paragraph

notify2.init('question answer')
notif = notify2.Notification('qa', result)
# notif.set_urgency(notify2.URGENCY_CRITICAL)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: result.py Projeto: imratnesh/DjangoStripeApp

# coding: utf-8
import os
import pandas as pd
from ast import literal_eval
import cdqa
from cdqa.utils.filters import filter_paragraphs
from cdqa.pipeline.cdqa_sklearn import QAPipeline

df = pd.read_csv('/home/ubuntu/data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv',
                 converters={'paragraphs': literal_eval})
df = filter_paragraphs(df)

df['title'] = df['category']

cdqa_pipeline = QAPipeline(
    reader='/home/ubuntu/data/bert_qa_vCPU-sklearn.joblib')
cdqa_pipeline.fit(X=df)

print('At result')


class QAModule():
    def __init__(self):
        self.query = 'Since when does the Excellence Program of BNP Paribas exist?'

    def getAnswer(self, query):
        prediction = cdqa_pipeline.predict(X=query)
        return prediction


class SentimentModule():

Exemplo n.º 14

0

Exibir arquivo

Arquivo: kakao_alpha_v2_ubuntu.py Projeto: kjb4494/jungchatbot

from flask import Flask, request, jsonify
from ast import literal_eval
import pandas as pd
from cdqa.utils.filters import filter_paragraphs
from cdqa.utils.download import download_model, download_bnpp_data
from cdqa.pipeline.cdqa_sklearn import QAPipeline
from cdqa.retriever import BM25Retriever
from ETRI import *
import time
from khaiii_def import *
app = Flask(__name__)
df = pd.read_csv('jungchat_result_191102.csv',converters={'paragraphs': literal_eval})
cdqa_pipeline = QAPipeline(reader='bert_qa_korquad_vCPU.joblib')#모델을 불러온다
retriever = BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None)#문서와의 유사도를 구하기위한 리트리버
retriever_temp= BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None)#문장과의 유사도를 구하기 위한 리트리버
retriever.fit(df)#모든 문서의 내용을 담는다
df = filter_paragraphs(df)
best_idx_scores = ''

def text_tranform(text) :
    return '\n'.join(text.split(', '))

def make_query(text) :
    dataSend = {
          "version": "2.0",
          "template": {
             "outputs": [{
                    "simpleText":{
                       "text" : text}
               }]
           }

Exemplo n.º 15

0

Exibir arquivo

import pandas as pd
from ast import literal_eval

from cdqa.utils.filters import filter_paragraphs
from cdqa.utils.download import download_model, download_bnpp_data
from cdqa.pipeline.cdqa_sklearn import QAPipeline

# Download data and models
#download_bnpp_data(dir='./data/bnpp_newsroom_v1.1/')
#download_model(model='bert-squad_1.1', dir='./models')

# Loading data and filtering / preprocessing the documents
df = pd.read_csv('data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv',
                 converters={'paragraphs': literal_eval})
df = filter_paragraphs(df)

# Loading QAPipeline with CPU version of BERT Reader pretrained on SQuAD 1.1
cdqa_pipeline = QAPipeline(reader='models/bert_qa_vCPU-sklearn.joblib')

# Fitting the retriever to the list of documents in the dataframe
_ = cdqa_pipeline.fit_retriever(df)

# Sending a question to the pipeline and getting prediction
query = 'Since when does the Excellence Program of BNP Paribas exist?'
prediction = cdqa_pipeline.predict(query)

print('query: {}\n'.format(query))
print('answer: {}\n'.format(prediction[0]))
print('title: {}\n'.format(prediction[1]))
print('paragraph: {}\n'.format(prediction[2]))

Exemplo n.º 16

0

Exibir arquivo

Arquivo: Tuto02.py Projeto: kjb4494/jungchatbot

import requests
from flask import Flask, request, Response
from flask_ngrok import run_with_ngrok
import pandas as pd
from ast import literal_eval
from cdqa.utils.filters import filter_paragraphs
from cdqa.utils.download import download_model, download_bnpp_data
from cdqa.pipeline.cdqa_sklearn import QAPipeline

API_KEY = '936714777:AAGFCBbeOAClrTsgmMMOsYG3HkaV7Ck5p-w'

app = Flask(__name__)
run_with_ngrok(app)
df = pd.read_csv('data/bnpp_newsroom_v1.1/jungchat_result.csv',
                 converters={'paragraphs': literal_eval})
cdqa_pipeline = QAPipeline(reader='models/bert_qa_korquad_vCPU.joblib')
cdqa_pipeline.fit_retriever(df)


def parse_message(message):
    chat_id = message['message']['chat']['id']
    msg = message['message']['text']

    return chat_id, msg


def send_message(chat_id, query):
    url = 'https://api.telegram.org/bot{token}/sendMessage'.format(
        token=API_KEY)
    # 변수들을 딕셔너리 형식으로 묶음

Exemplo n.º 17

0

Exibir arquivo

import pandas as pd
from ast import literal_eval
from cdqa.pipeline.cdqa_sklearn import QAPipeline
from rasa_sdk import Action

# read the csv file
df = pd.read_csv(
    '/Users/ashutoshvishnoi/Data_Science/intern_2/products/BankCurrupcy/qa_system/sample_data2/'
    'answs.csv',
    converters={'paragraphs': literal_eval})

# Load the bert qa model
cdqa_pipeline = QAPipeline(
    reader='/Users/ashutoshvishnoi/Data_Science/intern_2/products/BankCurrupcy/'
    'qa_system/models/bert_qa.joblib')

ques_dict = []

cdqa_pipeline.fit_retriever(df)
print('-----Model loaded successfully and fit successfully----')


class ActionGetNewst(Action):
    def name(self):
        return 'action_get_bertAns'

    def run(self, dispatcher, tracker, domain):
        query = tracker.latest_message['text']
        prediction = cdqa_pipeline.predict(query, n_predictions=3)

        # dispatcher.utter_message('query: {}\n'.format(query))

Exemplo n.º 18

0

Exibir arquivo

Arquivo: api.py Projeto: vinaybysani/cdQA

import os
from ast import literal_eval
import pandas as pd

from cdqa.utils.filters import filter_paragraphs
from cdqa.pipeline.cdqa_sklearn import QAPipeline

app = Flask(__name__)
CORS(app)

dataset_path = os.environ["dataset_path"]
reader_path = os.environ["reader_path"]

df = pd.read_csv(dataset_path, converters={"paragraphs": literal_eval})
df = filter_paragraphs(df)

cdqa_pipeline = QAPipeline(reader=reader_path)
cdqa_pipeline.fit(X=df)


@app.route("/api", methods=["GET"])
def api():

    query = request.args.get("query")
    prediction = cdqa_pipeline.predict(X=query)

    return jsonify(query=query,
                   answer=prediction[0],
                   title=prediction[1],
                   paragraph=prediction[2])

Exemplo n.º 19

0

Exibir arquivo

download_squad(dir=directory)
download_model('bert-squad_1.1', dir=directory)
download_model('distilbert-squad_1.1', dir=directory)

from ast import literal_eval
from cdqa.utils.filters import filter_paragraphs
from cdqa.pipeline.cdqa_sklearn import QAPipeline
from nltk import tokenize


def load_from_csv(file):
    df = pd.read_csv(file)
    df = df.rename(str.lower, axis='columns')
    df['paragraphs'] = df['paragraphs'].apply(
        lambda x: x.replace("'s", " "
                            "s").replace("\n", " "))
    df['paragraphs'] = df['paragraphs'].apply(
        lambda x: tokenize.sent_tokenize(x))
    return df


df = load_from_csv('./data/test.csv')
#make sure bert_qa.joblib is the same directory (cdQA), if not move it here from data
cdqa_pipeline = QAPipeline(reader='bert_qa.joblib')
cdqa_pipeline.fit_retriever(df=df)

querry = st.text_area('enter mssage', 'type')
if st.button('analyze'):
    message = cdqa_pipeline.predict(query=querry, n_predictions=2)
    st.success(message)

Exemplo n.º 20

0

Exibir arquivo

Arquivo: tutorial-cdqa.py Projeto: jaredbebb/cdQA

from cdqa.utils.download import download_model, download_bnpp_data
from cdqa.pipeline.cdqa_sklearn import QAPipeline

# Download data and models
download_bnpp_data(dir='./data/bnpp_newsroom_v1.1/')
# download_model(model='bert-squad_1.1', dir='./models')

# Loading data and filtering / preprocessing the documents
# df = pd.read_csv('data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv', converters={'paragraphs': literal_eval})
df = pd.read_csv('data/bnpp_newsroom_v1.1/custom_tax_jlb.csv',
                 converters={'paragraphs': literal_eval})
df = filter_paragraphs(df)

# Loading QAPipeline with CPU version of BERT Reader pretrained on SQuAD 1.1
# cdqa_pipeline = QAPipeline(reader='models/bert_qa_vCPU-sklearn.joblib')
cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib')

# Fitting the retriever to the list of documents in the dataframe
# cdqa_pipeline.fit_retriever(X=df)
cdqa_pipeline.fit_retriever(df=df)

# Sending a question to the pipeline and getting prediction
# query = 'Since when does the Excellence Program of BNP Paribas exist?'
# query = 'Who should investors  consult with prior to investing?'
# query = 'Who do custom animal farmers need to consult with before buying fertilizer?'
queries = [
    'Who do custom animal farmers need to consult with before buying fertilizer?',
    'do I qualify for an automatic extension of time to file without filing Form 4868?',
    'Did the coronavirus pandemic extend the deadline to pay taxes?',
    'What is the new tax deadline?', 'What is the tax payer advocate service?',
    'What is the job of the taxpayer advocate service?',

Exemplo n.º 21

0

Exibir arquivo

reader.fit(X=(train_examples, train_features))

# Output fine-tuned model
reader.model.to('cpu')
reader.device = torch.device('cpu')
joblib.dump(reader, os.path.join(reader.output_dir, 'bert_tim_qa_vCPU.joblib'))

#%% [markdown]
# ### Training

#%%
from cdqa.pipeline.cdqa_sklearn import QAPipeline

# Load standard model
cdqa_pipeline = QAPipeline(model='./cdqa/bert_qa_vCPU-sklearn.joblib', max_answer_length=60)
cdqa_pipeline.fit_retriever(X=df_X)


#%%
# Evaluate QnA system
from cdqa.utils.evaluation import evaluate_pipeline
evaluate_pipeline(cdqa_pipeline, 'cdqa-v1.1-tim_qna.json')

# Standard pre trained model: {'exact_match': 0.0, 'f1': 5.025362668068075}
# Fine-tuned model: {'exact_match': 0.0, 'f1': 5.684362620078064}

#%% [markdown]
# ### Inference

#%%

Exemplo n.º 22

0

Exibir arquivo

Arquivo: cdqa_test.py Projeto: CircuitSacul/MachineLearning

#df = pd.read_csv('data/my_data/homework.csv', converters={'paragraphs': literal_eval})
#df = filter_paragraphs(df)

df = pd.DataFrame(columns=['title', 'paragraphs'])
paragraphs = input("Text to Analyze:\n").split('\n')
df = df.append({
    'title': 'Inputed Data',
    'paragraphs': paragraphs
},
               ignore_index=True)

print(df)

cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib',
                           min_df=1,
                           max_df=1000)

cdqa_pipeline.fit_retriever(df=df)

while True:
    query = input('> ')
    prediction = cdqa_pipeline.predict(query=query)

    #if prediction[3] < -2:
    #    print("cdQA: Sorry, I don't know.")
    #else:
    #print('query: {}\n'.format(query))
    print('cdQA: {}'.format(prediction[0]))
    #print('title: {}\n'.format(prediction[1]))
    #print('paragraph: {}\n'.format(prediction[2]))