Exemplo n.º 1
0
    async def make(self):
        if not os.path.exists(self.index_directory):
            logger.warning("index for BERTQuestionAnswering missing")
            await self.index(force=True)

        self.qa = text.SimpleQA(
            index_dir=self.index_directory,
            bert_squad_model=self.bert_squad_model,
            bert_emb_model=self.bert_emb_model,
        )
Exemplo n.º 2
0
def query_qa(params):
    index_dir = INDEX_DIR_MAP[params['domain']]
    print("Index dir: ", index_dir)
    if index_dir:
        qa = text.SimpleQA(index_dir)
        query = formulate_query(params)
        print("Query: ", query)
        answers = qa.ask(query, batch_size=4)
        print("Answers are: ", type(answers), answers[0])
        return process_answers(answers=answers,
                               threshold=float(params['thresh']))
    else:
        return []
Exemplo n.º 3
0
def init():
    INDEX_DIR = DATASET_NAME + "/index"
    TXT_DIR = DATASET_NAME + "/txt"

    # if path.exists(INDEX_DIR):
    #     shutil.rmtree(INDEX_DIR)

    # text.SimpleQA.initialize_index(INDEX_DIR)
    # file_count = sum((len(f) for _, _, f in os.walk(TXT_DIR)))
    # text.SimpleQA.index_from_folder(TXT_DIR, INDEX_DIR, commit_every=file_count)
    qa = text.SimpleQA(INDEX_DIR)

    return qa
Exemplo n.º 4
0
    def test_qa(self):
        
        from sklearn.datasets import fetch_20newsgroups
        remove = ('headers', 'footers', 'quotes')
        newsgroups_train = fetch_20newsgroups(subset='train', remove=remove)
        newsgroups_test = fetch_20newsgroups(subset='test', remove=remove)
        docs = newsgroups_train.data +  newsgroups_test.data

        tmp_folder = '/tmp/qa_test'
        text.SimpleQA.initialize_index(tmp_folder)
        text.SimpleQA.index_from_list(docs, tmp_folder, commit_every=len(docs))
        qa = text.SimpleQA(tmp_folder)

        answers = qa.ask('When did Cassini launch?')
        top_answer = answers[0]['answer']
        self.assertEqual(top_answer, 'in october of 1997')
Exemplo n.º 5
0
    def test_qa(self):

        from sklearn.datasets import fetch_20newsgroups

        remove = ("headers", "footers", "quotes")
        newsgroups_train = fetch_20newsgroups(subset="train", remove=remove)
        newsgroups_test = fetch_20newsgroups(subset="test", remove=remove)
        docs = newsgroups_train.data + newsgroups_test.data

        # tmp_folder = '/tmp/qa_test'
        import shutil
        import tempfile

        tmp_folder = tempfile.mkdtemp()
        shutil.rmtree(tmp_folder)
        text.SimpleQA.initialize_index(tmp_folder)
        text.SimpleQA.index_from_list(
            docs, tmp_folder, commit_every=len(docs), multisegment=True
        )
        qa = text.SimpleQA(tmp_folder, framework="tf")

        answers = qa.ask("When did Cassini launch?")
        top_answer = answers[0]["answer"]
        self.assertEqual(top_answer, "in october of 1997")
Exemplo n.º 6
0
from ktrain import text
from ktrain_config import DATASET_NAME
import sys

question = sys.argv[1]

qa = text.SimpleQA(DATASET_NAME + "/index")
answers = qa.ask(question=question)

# qa.display_answers(answers[:5])

df = qa.answers2df(answers)

# print(df)
print("=" * 80)
print(question)
print("-" * 80)

for i in range(len(df)):
    row = df.iloc[i]
    print("=" * 80)
    print(row["Candidate Answer"])
    print("-" * 80)
    print(row["Context"])
Exemplo n.º 7
0
def QAModel():
    INDEXDIR = os.path.join(os.getcwd(), 'index')
    model = text.SimpleQA(INDEXDIR)
    return model
Exemplo n.º 8
0
import os
import json
import ktrain
from ktrain import text
from operator import itemgetter
from flask import Flask, jsonify, request, Response
from settings import BIO_BERT_SQUAD_ML, BIO_MODEL, INDEXDIR

#BIO_BERT_SQUAD_ML = "ktrapeznikov/biobert_v1.1_pubmed_squad_v2"
#BIO_MODEL = 'mrm8488/GPT-2-finetuned-covid-bio-medrxiv'


#app
app = Flask(__name__)
qa = text.SimpleQA(index_dir=INDEXDIR, \
        bert_squad_model=BIO_BERT_SQUAD_ML, \
        bert_emb_model= BIO_MODEL, \
        from_pytorch=True)

#Util Functions
def create_json(_answers):
    # create a json array for the answers list
    #sort the list by the confidence score
    #_response = answers.sort(key=itemgetter('confidence'))
    answers = []
    for item in _answers:
        new_item = {}
        new_item['confidence'] = float(item['confidence'])
        new_item['answer'] = item['full_answer']
        new_item['context'] = item['context']
        new_item['similarity_score'] = float(item['similarity_score'])
        new_item['reference'] = int(item['reference']) ### Need to fetch relevant doc from MongoDB
import os
import shutil
import ktrain
from ktrain import text

QA_MODEL = 'twmkn9/bert-base-uncased-squad2'

INDEX_DIR_PATH = os.path.join("/tmp", "index_dir")
docs = ['Hello world.']

# make sure INDEX_DIR_PATH not exist
if os.path.exists(INDEX_DIR_PATH):
    shutil.rmtree(INDEX_DIR_PATH)
else:
    pass

# setup index
text.SimpleQA.initialize_index(INDEX_DIR_PATH)
text.SimpleQA.index_from_list(docs, INDEX_DIR_PATH, commit_every=len(docs))

# download models
qa = text.SimpleQA(INDEX_DIR_PATH)
Exemplo n.º 10
0
from sklearn.datasets import fetch_20newsgroups
remove = ('headers', 'footers', 'quotes')
newsgroups_train = fetch_20newsgroups(subset='train', remove=remove)
newsgroups_test = fetch_20newsgroups(subset='test', remove=remove)
docs = newsgroups_train.data +  newsgroups_test.data

import ktrain
from ktrain import text

INDEXDIR = '/tmp/myindex'

text.SimpleQA.initialize_index(INDEXDIR)
text.SimpleQA.index_from_list(docs, INDEXDIR, commit_every=len(docs))

qa = text.SimpleQA(INDEXDIR)

answers = qa.ask('When did the Cassini probe launch?')
qa.display_answers(answers[:5])

answers = qa.ask('What causes computer images to be too dark?')
qa.display_answers(answers[:5])

answers = qa.ask('Who was Jesus Christ?')
qa.display_answers(answers[:5])

answers = qa.ask('Who is sachin tendulkkar?')
qa.display_answers(answers[:5])

answers = qa.ask('What is solar panel battery')
qa.display_answers(answers[:5])
Exemplo n.º 11
0
# load 20newsgroups dataset into an array
from sklearn.datasets import fetch_20newsgroups
from ktrain import text
import os.path as path

INDEX_DIRECTORY = '/tmp/newsgroups_index'
qa = text.SimpleQA(INDEX_DIRECTORY)


def index(index_directory):
    remove = ("headers", "footers", "quotes")
    newsgroups_train = fetch_20newsgroups(subset='train', remove=remove)
    newsgroups_test = fetch_20newsgroups(subset='test', remove=remove)
    docs = newsgroups_train.data + newsgroups_test.data

    text.SimpleQA.initialize_index(index_directory)
    text.SimpleQA.index_from_list(docs, index_directory, commit_every=100)


def ask_index(question: str, max_answers: int):
    if not path.isdir(INDEX_DIRECTORY):
        index(INDEX_DIRECTORY)

    return qa.ask(question, n_answers=max_answers)