def set_default_encounter_based_prob(): zeeguu.app.test_request_context().push() zeeguu.db.session.commit() default_probability = 0.5 languages = Language.all() users = User.find_all() for user in users: for lang in languages: marked_words_of_user_in_text = [] words_of_all_bookmarks_content = [] for bookmark in Bookmark.find_by_specific_user(user): if bookmark.origin.language == lang: # bookmark_content_words = re.sub("[^\w]", " ", bookmark.text.content).split() bookmark_content_words = re.findall(r'(?u)\w+', bookmark.text.content) words_of_all_bookmarks_content.extend(bookmark_content_words) marked_words_of_user_in_text.append(bookmark.origin.word) words_known_from_user= [word for word in words_of_all_bookmarks_content if word not in marked_words_of_user_in_text] for word_known in words_known_from_user: if RankedWord.exists(word_known, lang): rank = RankedWord.find(word_known, lang) if EncounterBasedProbability.exists(user, rank): prob = EncounterBasedProbability.find(user,rank, default_probability) prob.not_looked_up_counter +=1 else: prob = EncounterBasedProbability.find(user,rank,default_probability) zeeguu.db.session.add(prob) zeeguu.db.session.commit() print 'job2'
def set_default_encounter_based_prob(): zeeguu.app.test_request_context().push() zeeguu.db.session.commit() default_probability = 0.5 languages = Language.all() users = User.find_all() for user in users: for lang in languages: marked_words_of_user_in_text = [] words_of_all_bookmarks_content = [] for bookmark in Bookmark.find_by_specific_user(user): if bookmark.origin.language == lang: # bookmark_content_words = re.sub("[^\w]", " ", bookmark.text.content).split() bookmark_content_words = re.findall( r'(?u)\w+', bookmark.text.content) words_of_all_bookmarks_content.extend( bookmark_content_words) marked_words_of_user_in_text.append(bookmark.origin.word) words_known_from_user = [ word for word in words_of_all_bookmarks_content if word not in marked_words_of_user_in_text ] for word_known in words_known_from_user: if RankedWord.exists(word_known, lang): rank = RankedWord.find(word_known, lang) if EncounterBasedProbability.exists(user, rank): prob = EncounterBasedProbability.find( user, rank, default_probability) prob.not_looked_up_counter += 1 else: prob = EncounterBasedProbability.find( user, rank, default_probability) zeeguu.db.session.add(prob) zeeguu.db.session.commit() print 'job2'
def get_not_looked_up_words(self): filtered_words_known_from_user_dict_list = [] enc_probs = EncounterBasedProbability.find_all_by_user(self.user) for enc_prob in enc_probs: if enc_prob.ranked_word.language == self.language: filtered_words_known_from_user_dict_list.append( {'word': enc_prob.ranked_word.word}) return filtered_words_known_from_user_dict_list
def get_upper_bound_percentage_of_extended_vocabulary(self): count_not_looked_up_words_with_rank = 0 not_looked_up_words = EncounterBasedProbability.find_all_by_user( self.user) for prob in not_looked_up_words: if prob.ranked_word.rank <= 10000: count_not_looked_up_words_with_rank += 1 return round( float(count_not_looked_up_words_with_rank) / 10000 * 100, 2)
def update_probabilities_for_word(word): try: bookmarks_for_this_word = Bookmark.find_all_by_user_and_word( flask.g.user, word) ex_prob = ExerciseBasedProbability.find(flask.g.user, word) total_prob = 0 for b in bookmarks_for_this_word: ex_prob.calculate_known_bookmark_probability(b) total_prob += float(ex_prob.probability) ex_prob.probability = total_prob / len(bookmarks_for_this_word) if RankedWord.exists(word.word, word.language): ranked_word = RankedWord.find(word.word, word.language) if EncounterBasedProbability.exists(flask.g.user, ranked_word): enc_prob = EncounterBasedProbability.find( flask.g.user, ranked_word) known_word_prob = KnownWordProbability.find( flask.g.user, word, ranked_word) print "!known word prob before: " + str( known_word_prob.probability) print "!ex_prob: " + str(ex_prob.probability) print "!enc_prob: " + str(enc_prob.probability) known_word_prob.probability = KnownWordProbability.calculateKnownWordProb( ex_prob.probability, enc_prob.probability) print "!known word prob after: " + str( known_word_prob.probability) else: known_word_prob = KnownWordProbability.find( flask.g.user, word, ranked_word) known_word_prob.probability = ex_prob.probability db.session.commit() except: print "failed to update probabilities for word with id: " + str( word.id) print "!successfully updated probabilities for word with id: " + str( word.id)
def set_default_encounter_based_prob(): zeeguu.app.test_request_context().push() zeeguu.db.session.commit() encounter_prob = EncounterBasedProbability.find_all() for prob in encounter_prob: for i in range(1, prob.not_looked_up_counter): a = decimal.Decimal('1.0') b = prob.probability c = decimal.Decimal('0.1') if b < a: prob.probability = b + c zeeguu.db.session.commit() print('job2')
def set_know_word_prob(): zeeguu.app.test_request_context().push() zeeguu.db.session.commit() enc_probs = EncounterBasedProbability.find_all() ex_probs = ExerciseBasedProbability.find_all() for prob in enc_probs: user = prob.user word = prob.ranked_word.word language = prob.ranked_word.language user_word = None if UserWord.exists(word, language): user_word = UserWord.find(word, language) if ExerciseBasedProbability.exists(user, user_word): ex_prob = ExerciseBasedProbability.find(user, user_word) known_word_prob = KnownWordProbability.calculateKnownWordProb( ex_prob.probability, prob.probability) known_word_probability_obj = KnownWordProbability.find( user, user_word, prob.ranked_word, known_word_prob) else: known_word_probability_obj = KnownWordProbability.find( user, None, prob.ranked_word, prob.probability) zeeguu.db.session.add(known_word_probability_obj) zeeguu.db.session.commit() for prob in ex_probs: user = prob.user language = prob.user_word.language word = prob.user_word.word ranked_word = None if RankedWord.exists(word, language): ranked_word = RankedWord.find(word, language) if not EncounterBasedProbability.exists(user, ranked_word): if UserWord.exists(word, language): user_word = UserWord.find(word, language) known_word_probability_obj = KnownWordProbability( user, user_word, ranked_word, prob.probability) zeeguu.db.session.add(known_word_probability_obj) zeeguu.db.session.commit() print('job3')
def calculate_probabilities_after_adding_a_bookmark(self, user,language): """ ML: This has to be refactored. It's a mess. The idea is: you've just added a bookmark. There are two things to do: 1. update the probabilities of the context words (they have been encountered, and not translated) 2. update the probabilities of the word itself - :param user: :param language: :return: """ # 1. computations for adding encounter based probability for the context words for word in self.context_words_with_rank(): enc_prob = EncounterBasedProbability.find_or_create(word, user, language) zeeguu.db.session.add(enc_prob) zeeguu.db.session.commit() user_word = None ranked_word = enc_prob.ranked_word if UserWord.exists(word,language): user_word = UserWord.find(word,language) if ExerciseBasedProbability.exists(user,user_word): #checks if exercise based probability exists for words in context ex_prob = ExerciseBasedProbability.find(user,user_word) known_word_prob = KnownWordProbability.find(user,user_word,ranked_word) known_word_prob.probability = known_word_prob.calculateKnownWordProb(ex_prob.probability, enc_prob.probability) #updates known word probability as exercise based probability already existed. else: if KnownWordProbability.exists(user, user_word,ranked_word): known_word_prob = KnownWordProbability.find(user,user_word,ranked_word) known_word_prob.probability = enc_prob.probability # updates known word probability as encounter based probability already existed else: known_word_prob = KnownWordProbability.find(user,user_word,ranked_word, enc_prob.probability) # new known word probability created as it did not exist zeeguu.db.session.add(known_word_prob) # 2. Update the probabilities of the word itself # 2.a) exercise based prob # ML: Should this thing change? # The ex based probability should probably not change after I add a bookmark # Commenting out the following lines: s # ex_prob = ExerciseBasedProbability.find(user, self.origin) # if ex_prob: # ex_prob.update_probability_after_adding_bookmark_with_same_word(self,user) # zeeguu.db.session.add(ex_prob) # 2.b) encounter based prob ranked_word = RankedWord.find(self.origin.word, language) if ranked_word: #checks if ranked_word exists for that looked up word if EncounterBasedProbability.exists(user, ranked_word): # checks if encounter based probability exists for that looked up word enc_prob = EncounterBasedProbability.find(user, ranked_word) enc_prob.word_has_just_beek_bookmarked() db.session.add(enc_prob) db.session.commit() # 2.c) update known word probability if it exists if KnownWordProbability.exists(user, self.origin,ranked_word): known_word_prob = KnownWordProbability.find(user,self.origin,ranked_word) known_word_prob.word_has_just_beek_bookmarked() db.session.add(known_word_prob) db.session.commit()
def calculate_probabilities_after_adding_a_bookmark(self, user, language): """ ML: This has to be refactored. It's a mess. The idea is: you've just added a bookmark. There are two things to do: 1. update the probabilities of the context words (they have been encountered, and not translated) 2. update the probabilities of the word itself - :param user: :param language: :return: """ # 1. computations for adding encounter based probability for the context words for word in self.context_words_with_rank(): enc_prob = EncounterBasedProbability.find_or_create( word, user, language) zeeguu.db.session.add(enc_prob) zeeguu.db.session.commit() user_word = None ranked_word = enc_prob.ranked_word if UserWord.exists(word, language): user_word = UserWord.find(word, language) if ExerciseBasedProbability.exists( user, user_word ): #checks if exercise based probability exists for words in context ex_prob = ExerciseBasedProbability.find(user, user_word) known_word_prob = KnownWordProbability.find( user, user_word, ranked_word) known_word_prob.probability = known_word_prob.calculateKnownWordProb( ex_prob.probability, enc_prob.probability ) #updates known word probability as exercise based probability already existed. else: if KnownWordProbability.exists(user, user_word, ranked_word): known_word_prob = KnownWordProbability.find( user, user_word, ranked_word) known_word_prob.probability = enc_prob.probability # updates known word probability as encounter based probability already existed else: known_word_prob = KnownWordProbability.find( user, user_word, ranked_word, enc_prob.probability ) # new known word probability created as it did not exist zeeguu.db.session.add(known_word_prob) # 2. Update the probabilities of the word itself # 2.a) exercise based prob # ML: Should this thing change? # The ex based probability should probably not change after I add a bookmark # Commenting out the following lines: s # ex_prob = ExerciseBasedProbability.find(user, self.origin) # if ex_prob: # ex_prob.update_probability_after_adding_bookmark_with_same_word(self,user) # zeeguu.db.session.add(ex_prob) # 2.b) encounter based prob ranked_word = RankedWord.find(self.origin.word, language) if ranked_word: #checks if ranked_word exists for that looked up word if EncounterBasedProbability.exists( user, ranked_word ): # checks if encounter based probability exists for that looked up word enc_prob = EncounterBasedProbability.find(user, ranked_word) enc_prob.word_has_just_beek_bookmarked() db.session.add(enc_prob) db.session.commit() # 2.c) update known word probability if it exists if KnownWordProbability.exists(user, self.origin, ranked_word): known_word_prob = KnownWordProbability.find( user, self.origin, ranked_word) known_word_prob.word_has_just_beek_bookmarked() db.session.add(known_word_prob) db.session.commit()