/
know_it_all.py
227 lines (190 loc) · 7.72 KB
/
know_it_all.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
__author__ = 'mrittha'
"""
the know-it-all is a method for studying subjects. It allows the student to study subjects based on
wikipedia or text files. It reads the information to the learner, then asks questions about the information
just read. It keeps track of the answers, so determine how well the student knows each subject.
It allows for reviews, and also text clustering to fit new subjects into its knowledge base.
In general the idea is that the student keeps using the software, and it keeps getting smarter about what the
student knows.
track and break out questions.
"""
import math
import random
import codecs
import json
import wikipedia
import os
import know_it_all.text_processor.clozure as clozure
import know_it_all.cli.talker as talker
import know_it_all.text_processor.chunk_o_learning as col
import know_it_all.cli.simple_menu as simple_menu
import know_it_all.evaluation.multi_choice as mc
import know_it_all.readers.ebook as ebook
import know_it_all.readers.pdf as rpdf
import know_it_all.text_processor.rake as rake
from know_it_all.study import study_doc as sd,section as sec,paragraph as par,questions as q
from pprint import pprint
def question_count(questions,rate):
return max(1, math.trunc(len(questions) * rate))
def study_questions(questions, count):
if len(questions) < 1:
return 0, 0
points = 0
to_use = count
possible_points = 0.0
talker.print_and_talk(f"I will ask {str(to_use)} questions.")
for question in random.sample(questions, to_use):
possible_points+=question['score']
talker.print_and_talk(question['question'])
answer = talker.ask('Fill in the blank:')
if answer == "I'm done":
return points, possible_points
if answer.lower() == question['answer'].lower():
talker.print_and_talk("You are correct!")
points += question['score']
else:
talker.print_and_talk("Sorry, I was looking for:", question['answer'])
talker.print_and_talk("You got ", points, "points out of", possible_points)
return points, possible_points
def fetch_subject_file(subject):
subject = subject.replace(' ', '_').lower()
subject_file = 'subjects/' + subject + '.json'
try:
with open(subject_file) as f:
data = f.read()
return json.loads(data)
except IOError as e:
return None
def study_text(sentences):
# print text.encode(sys.stdout.encoding, errors='replace')
# text=unicodedata.normalize('NFKD',text).encode('ascii','replace')
questions = clozure.make_study_set_sentences(sentences)
return study_questions(questions, 0.75)
def study_text_complex_clozures(text):
questions=rake.complex_clozures(text)
mc.ask_questions(questions)
def study_paragraph(paragraph:dict,q_type='fill_in_the_blank'):
questions=par.get_questions(paragraph)
#fill in the blank
count=question_count(questions,0.3)
if q_type=='fill_in_the_blank':
score=study_questions(questions,count)
elif q_type=='multiple_choice':
score=mc.ask_questions(questions,count)
paragraph['score']=score
return paragraph
def study_section(document,selection):
section = sd.get_section(document,selection)
p_count=len(sec.get_paragraph_titles(section))
section['points'] = (0, 0)
if 'studied' not in section:
section['studied']=(0,p_count)
print("************************")
print()
print()
talker.print_and_talk(section['title'])
talker.print_and_talk("You have studied",section['studied'][0],"paragraphs out of",section['studied'][1])
for i,paragraph in enumerate(sec.get_paragraphs(section).values()):
print()
for line in par.get_sentences(paragraph):
talker.print_and_talk(line)
print()
talker.ask('Hit enter when ready.')
# for i in range(30):
# print '.'
paragraph = study_paragraph(paragraph,'multiple_choice')
section=sec.update_paragraph(section,paragraph)
if i+1>section['studied'][0]:
section['studied']=(i+1,p_count)
document=sd.update_section(document,section)
return document
#def study_chunk(chunk):
# while True:
# talker.print_and_talk("What section would you like to study?")
# selection=simple_menu.ask_list(chunk['section_list'])
# if not selection:
# return
# study_section(chunk,selection)
def get_suggestion(subject):
suggestions = wikipedia.search(subject)
return suggestions[0]
def study_subject(subject):
talker.print_and_talk("You want to learn about:", subject)
chunk = fetch_subject_file(subject)
if not chunk:
talker.print_and_talk("I will ask wikipedia for the best suggestion")
suggestion = get_suggestion(subject)
talker.print_and_talk('"'+suggestion+'"', "was suggested by wikipedia.")
chunk = fetch_subject_file(suggestion)
if not chunk:
article = wikipedia.page(suggestion)
if not article:
talker.print_and_talk("Sorry, I couldn't find an article on", suggestion)
return
chunk = col.make_chunk(article,suggestion)
talker.print_and_talk("I have loaded the article on", suggestion)
else:
talker.print_and_talk("I found a local file for", suggestion)
else:
talker.print_and_talk("I found a local file for", subject)
#study_chunk(chunk)
def study_text_file(filename):
with codecs.open(filename, encoding='utf-8') as f:
text = f.read()
study_text(text)
#def study_section_file(filename):
# """Expects files in the section json format.
# uses that format to convert the file into a set of questions."""
# chunk=col.section_file_to_chunk(filename)
# talker.print_and_talk("I have loaded the file")
# study_chunk(chunk)
def study_a_study_doc(filepath:str):
document=sd.read(filepath)
while True:
talker.print_and_talk("What section would you like to study?")
selection=simple_menu.ask_list(sd.section_names(document))
if not selection:
return
document=study_section(document,selection[0])
sd.write(document,filepath)
def study_file(filename:str):
#text=''
if filename.endswith("epub"):
study_document_path=ebook.create_study_doc_path(filename)
if not os.path.exists(study_document_path):
ebook.create_full_doc(filename,study_document_path)
study_a_study_doc(study_document_path)
#elif filename.endswith("pdf"):
# text=rpdf.to_text(filename)
#elif filename.endswith("txt"):
# with open(filename,encoding='utf-8') as f:
# text=f.read()
else:
raise ValueError(f'filename {filename} is of an unknown type based on file ending.')
#study_text_complex_clozures(text)
def learn_wikipedia_subjects():
done = False
while not done:
answer = talker.ask('What wikipedia subject do you want to learn about?:')
if answer == "I'm done" or answer.strip()=="":
done = True
else:
study_subject(answer.lower())
def learn_all_the_things():
done = False
while not done:
print("I understand: 'wikipedia' or 'a file' or 'I'm done'")
answer = talker.ask('What do you want to learn about?:')
if answer == "I'm done" or answer.strip()=="":
done = True
elif answer.lower() == "a file":
file_name=talker.ask('Please enter the file you wish to learn')
study_file(file_name)
else:
learn_wikipedia_subjects()
if __name__ == "__main__":
# print wikipedia.search("harry potter")
# study_file("text/saturn.txt")
# talker.TALK = False
# wikipedia.set_lang("simple")
learn_all_the_things()