Пример #1
0
from bert import QA

model = QA('model')

doc = "Victoria has a written constitution enacted in 1975, but based on the 1855 colonial constitution, passed by the United Kingdom Parliament as the Victoria Constitution Act 1855, which establishes the Parliament as the state's law-making body for matters coming under state responsibility. The Victorian Constitution can be amended by the Parliament of Victoria, except for certain 'entrenched' provisions that require either an absolute majority in both houses, a three-fifths majority in both houses, or the approval of the Victorian people in a referendum, depending on the provision."
doc = "According to the Indian census of 2001, there were 30,803,747 speakers of Malayalam in Kerala, making up 93.2% of the total number of Malayalam speakers in India, and 96.7% of the total population of the state. There were a further 701,673 (2.1% of the total number) in Karnataka, 557,705 (1.7%) in Tamil Nadu and 406,358 (1.2%) in Maharashtra. The number of Malayalam speakers in Lakshadweep is 51,100, which is only 0.15% of the total number, but is as much as about 84% of the population of Lakshadweep. In all, Malayalis made up 3.22% of the total Indian population in 2001. Of the total 33,066,392 Malayalam speakers in India in 2001, 33,015,420 spoke the standard dialects, 19,643 spoke the Yerava dialect and 31,329 spoke non-standard regional variations like Eranadan. As per the 1991 census data, 28.85% of all Malayalam speakers in India spoke a second language and 19.64% of the total knew three or more languages.  Large numbers of Malayalis have settled in Bangalore, Mangalore, Delhi, Coimbatore, Hyderabad, Mumbai (Bombay), Ahmedabad, Pune, and Chennai (Madras). A large number of Malayalis have also emigrated to the Middle East, the United States, and Europe. Accessed November 22, 2014.</ref> including a large number of professionals. There were 7,093 Malayalam speakers in Australia in 2006. The 2001 Canadian census reported 7,070 people who listed Malayalam as their mother tongue, mostly in the Greater Toronto Area and Southern Ontario. In 2010, the Census of Population of Singapore reported that there were 26,348 Malayalees in Singapore. The 2006 New Zealand census reported 2,139 speakers. 134 Malayalam speaking households were reported in 1956 in Fiji. There is also a considerable Malayali population in the Persian Gulf regions, especially in Bahrain, Muscat, Doha, Dubai, Abu Dhabi, Kuwait and European region mainly in London.  World Malayalee Council, the organisation working with the Malayali diaspora across the Globe has embarked upon a project for making a data bank of the diaspora. CANNOTANSWER"
q = 'When did Victoria enact its constitution?'
q = "What other languages are spoken there?"
answer = model.predict(doc, q)

print(answer['answer'])
# 1975

# dict_keys(['answer', 'start', 'end', 'confidence', 'document']))
Пример #2
0
model = QA('model')
"""
Now, let us implement comprehending a passage from google with our existing zero shot model
"""

passage = 'There was a princess called Maggie, she was tall and as white as snow. She had red lips like a \
rose and her hair was brown. She had light blue eyes and was very nice and kind. \
She was in love with a bricklayer called Kevin. He was tall, had brown hair and tanned skin. \
He was strong, had dark eyes, was kind and he was never angry'

ques = 'What is the name of the princess?'
"""
Generally the predict method gives out a dictionary with other values as well, but since our interest lies with answer we subset the same.
"""
answer = model.predict(passage, ques)['answer']

print("{} : {}".format(ques, answer))
"""
Let us try few more questions to see the edge case scenarios to understand where the model might break
"""

answer = model.predict(passage, 'Who was Maggie?')['answer']

print("{} : {}".format('Who was Maggie?', answer))

answer = model.predict(passage,
                       'What is the color of the Maggies hair?')['answer']

print("{} : {}".format('What is the color of the Maggies hair?', answer))
"""
        iterator = iterator + 1

        question_answer = question_row['Dist' + question_row['FullKey']]

        # If the answer to the question is not in the text, the model cannot find a cue
        if (question_answer not in text): continue

        question_id = question_row['RecordID']
        question = question_row['Stem'].lower()
        question = question.replace(':', '?')

        # Assemble a block of text to replicate the trained environment
        build_text = BuildText(text, text_answer, length, df[context])

        # Predict the answer from the text
        prediction = model.predict(build_text, question)

        print(f'\rprogress: {iterator} / {total}', end='', flush=True)

        # Write confidence of cue to csv
        with open(f'output_{time_str}.csv', 'a', newline='') as output:
            wr = csv.writer(output, quoting=csv.QUOTE_ALL)
            wr.writerow([iterator, text, question, \
                question_row['DistA'], question_row['DistB'], question_row['DistC'], question_row['DistD'], question_row['FullKey'], \
                prediction['prediction'], prediction['start'], prediction['end'], prediction['confidence']])

        # If the model guessed the correct answer, output to the console
        if prediction['prediction'] != 'N/A' and prediction['prediction'] in \
            [question_row['DistA'], question_row['DistB'], question_row['DistC'], question_row['DistD']]:
            print()
            print(f'text id: {text_id}')
Пример #4
0
    stry = stories[i]
    for j in len(stry):
        pass
'''
question = [[] for i in range(len(stories))]
answers = [[] for i in range(len(stories))]

for i in questions:
    q = questions[i]
    question[q['story_index']].append(q['question'][0])
    answers[q['story_index']].append(q['answer'][0])

cnt = 0
for i in range(len(story_list)):
    for j in range(len(question[i])):
        a = model.predict(story_list[i],question[i][j])
        #ans.append(a['answer'])
        print(story_list[i],question[i][j])
        print(a['answer'],answers[i][j])
        if answers[i][j].lower() in a['answer'].lower():
            print("True")
            cnt+=1 
    
print("Acc ",float(cnt)/len(questions),cnt)






Пример #5
0
    try:
        soup = BeautifulSoup(dfPagesUnique.loc[i, 'html'], 'html.parser')
    except:
        soup = ""

    if len(soup) != 0:
        #TBody Content
        texts = soup.findAll(text=True)
        visible_texts = filter(tag_visible, texts)
        myBody = " ".join(t.strip() for t in visible_texts)
        myBody = myBody.strip()
        #myBody = strip_accents(myBody, encoding).lower()  #think  to do a global clean instead
        myBody = " ".join(myBody.split(" "))  #remove multiple spaces
        print(myBody)
        dfPagesUnique.loc[i, 'body'] = myBody
        answer = model.predict(dfPagesUnique.loc[i, 'body'], myKeyword)
        print("BERT_score" + str(answer['mean_total_prob']))
        dfPagesUnique.loc[i, 'BERT_score'] = answer['mean_total_prob']
        dfAnswer = pd.DataFrame(answer,
                                columns=[
                                    'answers', 'starts', 'ends', 'local_probs',
                                    'total_probs'
                                ])
        dfPageAnswer = pd.DataFrame(columns=[
            'keyword', 'page', 'position', 'BERT_score', 'source',
            'search_date', 'answers', 'starts', 'ends', 'local_probs',
            'total_probs'
        ])
        for k in range(0, len(dfAnswer)):
            dfPageAnswer.loc[k, 'keyword'] = dfPagesUnique.loc[i, 'keyword']
            dfPageAnswer.loc[k, 'page'] = dfPagesUnique.loc[i, 'page']
Пример #6
0
from bert import QA

model = QA('model')

story = input('Enter story: ')
flag = 'y'
while flag == 'y' or flag == 'yes':
    ques = input('Enter question: ')
    answer1 = model.predict(story, ques)
    print('Answer is ', answer1['answer'])
    flag = input('Ask more: y/n ')
'''
doc = "Hey! I am Mehak. I love dancing. I live in Punjab. I have one brother and one sister. My father is a businessman. My mother is a teacher. I love my family."

q1 = 'What is my name?'
q2 = 'What are my hobbies?'
q3 = 'How many siblings do I have?'
q4 = 'What is the occupation of my mother?'
q5 = 'What is the occupation of my father?'

answer1 = model.predict(doc,q1)
answer2 = model.predict(doc,q2)
answer3 = model.predict(doc,q3)
answer4 = model.predict(doc,q4)
answer5 = model.predict(doc,q5)

print(answer1['answer'])
print(answer2['answer'])
print(answer3['answer'])
print(answer4['answer'])
print(answer5['answer'])
Пример #7
0
import time
import nltk

from bert import QA
model = QA('model')

doc = open("data.txt").read()
q = "Which department filed an antitrust lawsuit against Microsoft in 1998 ?"
s = ""
start_time = time.time()
words = nltk.word_tokenize(doc)
print("Words in the document are "+ str(len(words))+" words")
if(len(words)>512):
    answer = model.predict(doc,q,150)
else:
    answer = model.predict(doc,q,len(words))


if(len(answer)==0):
    s+="No"
else:
    s+="Yes, "
    for i in range(len(answer)):
        t = answer[i]
        s+= t['answer'] + ", "
# 1975
print(q)
print("Final answer: ",s)
print("Time taken in seconds: " , (time.time() - start_time))
Пример #8
0
import sys
sys.path.append('./BERT-SQuAD')
import json
import torch
from bert import QA


def predict(doc, q):
    answer = model.predict(doc, q)
    content = json.dumps(answer, separators=(',', ':'))
    print(content, flush=True)


print("Loading model!", flush=True)
model = QA('model')
print("Model loading complete!", flush=True, end='')
model.predict("Whales are a kind of animal called a mammal.",
              "What is a whale?")