예제 #1
0
    def test_text_difficulty(self):
        data = """
            {
            "texts":
                [
                    {"content": "Der die das warum, wer nicht fragt bleibt bew\u00f6lkt!", "id": 1},
                    {"content": "Das ist ein Test.", "id": 2}],
            "difficulty_computer": "default"
            }
        """

        with zeeguu.app.app_context():
            RankedWord.cache_ranked_words()

        rv = self.api_post('/get_difficulty_for_text/de', data,
                           'application/json')

        difficulties = json.loads(rv.data)['difficulties']
        first_text_difficulty = difficulties[0]
        second_text_difficulty = difficulties[1]

        assert round(first_text_difficulty['score_average'], 2) == 0.67
        assert first_text_difficulty['estimated_difficulty'] == 'HARD'

        assert second_text_difficulty['estimated_difficulty'] == 'EASY'
예제 #2
0
 def context_words_with_rank(self):
     ranked_context_words = self.split_words_from_context()
     while self.origin.word in ranked_context_words: ranked_context_words.remove(self.origin.word)
     filtered_words_known_from_user = []
     for word_known in ranked_context_words:
         if RankedWord.exists(word_known.lower(), self.origin.language):
             filtered_words_known_from_user.append(word_known)
     return filtered_words_known_from_user
예제 #3
0
 def context_words_with_rank(self):
     ranked_context_words = self.split_words_from_context()
     while self.origin.word in ranked_context_words:
         ranked_context_words.remove(self.origin.word)
     filtered_words_known_from_user = []
     for word_known in ranked_context_words:
         if RankedWord.exists(word_known.lower(), self.origin.language):
             filtered_words_known_from_user.append(word_known)
     return filtered_words_known_from_user
예제 #4
0
 def find_or_create(cls, word, user, language):
     ranked_word = RankedWord.find(word.lower(), language)
     if EncounterBasedProbability.exists(user, ranked_word):
         enc_prob = EncounterBasedProbability.find(user, ranked_word)
         enc_prob.not_looked_up_counter += 1
         enc_prob.boost_prob()
     else:
         enc_prob = EncounterBasedProbability.find(
             user, ranked_word,
             EncounterBasedProbability.DEFAULT_PROBABILITY)
     return enc_prob
def update_probabilities_for_word(word):

    try:
        bookmarks_for_this_word = Bookmark.find_all_by_user_and_word(
            flask.g.user, word)

        ex_prob = ExerciseBasedProbability.find(flask.g.user, word)
        total_prob = 0
        for b in bookmarks_for_this_word:
            ex_prob.calculate_known_bookmark_probability(b)
            total_prob += float(ex_prob.probability)
        ex_prob.probability = total_prob / len(bookmarks_for_this_word)

        if RankedWord.exists(word.word, word.language):
            ranked_word = RankedWord.find(word.word, word.language)
            if EncounterBasedProbability.exists(flask.g.user, ranked_word):
                enc_prob = EncounterBasedProbability.find(
                    flask.g.user, ranked_word)
                known_word_prob = KnownWordProbability.find(
                    flask.g.user, word, ranked_word)
                print "!known word prob before: " + str(
                    known_word_prob.probability)
                print "!ex_prob: " + str(ex_prob.probability)
                print "!enc_prob: " + str(enc_prob.probability)
                known_word_prob.probability = KnownWordProbability.calculateKnownWordProb(
                    ex_prob.probability, enc_prob.probability)
                print "!known word prob after: " + str(
                    known_word_prob.probability)
            else:
                known_word_prob = KnownWordProbability.find(
                    flask.g.user, word, ranked_word)
                known_word_prob.probability = ex_prob.probability

        db.session.commit()
    except:
        print "failed to update probabilities for word with id: " + str(
            word.id)

    print "!successfully updated probabilities for word with id: " + str(
        word.id)
 def get_not_encountered_words(self):
     not_encountered_words_dict_list = []
     all_ranks = RankedWord.find_all(self.language)
     known_word_probs = KnownWordProbability.find_all_by_user_with_rank(
         self.user)
     for p in known_word_probs:
         if p.ranked_word in all_ranks:
             all_ranks.remove(p.ranked_word)
     for rank in all_ranks:
         not_encountered_word_dict = {}
         not_encountered_word_dict['word'] = rank.word
         not_encountered_words_dict_list.append(not_encountered_word_dict)
     return not_encountered_words_dict_list
예제 #7
0
def text_difficulty(text,
                    language,
                    known_probabilities,
                    difficulty_computer='default',
                    rank_boundary=REFERENCE_VOCABULARY_SIZE):
    """
    :param known_probabilities: the probabilities that the user knows individual words
    :param language: the learned language
    :param difficulty_computer: if known the name of the algo used to compute the difficulty.
        currently only default is implemented
    :param personalized (deprecated)
    :param rank_boundary: 10.000 words
    :param text: text to analyse
    :return: a dictionary with three items for every text:
      1. score_average - average difficulty of the words in the text
      2. score_median - median difficulty of the words in the text
      3. estimated_difficulty - oen of three "EASY", "MEDIUM", "HARD"
    """
    word_difficulties = []

    # Calculate difficulty for each word
    words = split_words_from_text(text)

    for word in words:
        ranked_word = RankedWord.find_cache(word, language)
        difficulty = word_difficulty(known_probabilities, True, rank_boundary,
                                     ranked_word, word)
        word_difficulties.append(difficulty)

    # If we can't compute the text difficulty, we estimate hard
    if (len(word_difficulties)) == 0:
        return \
            dict(
                score_median=1,
                score_average=1,
                estimated_difficulty=1)

    # Average difficulty for text
    difficulty_average = sum(word_difficulties) / float(len(word_difficulties))

    # Median difficulty
    word_difficulties.sort()
    center = int(round(len(word_difficulties) / 2, 0))
    difficulty_median = word_difficulties[center]

    difficulty_scores = dict(score_median=difficulty_median,
                             score_average=difficulty_average,
                             estimated_difficulty=discrete_text_difficulty(
                                 difficulty_average, difficulty_median))

    return difficulty_scores
예제 #8
0
def text_difficulty(text, language, known_probabilities, difficulty_computer = 'default', rank_boundary = REFERENCE_VOCABULARY_SIZE):
    """
    :param known_probabilities: the probabilities that the user knows individual words
    :param language: the learned language
    :param difficulty_computer: if known the name of the algo used to compute the difficulty.
        currently only default is implemented
    :param personalized (deprecated)
    :param rank_boundary: 10.000 words
    :param text: text to analyse
    :return: a dictionary with three items for every text:
      1. score_average - average difficulty of the words in the text
      2. score_median - median difficulty of the words in the text
      3. estimated_difficulty - oen of three "EASY", "MEDIUM", "HARD"
    """
    word_difficulties = []

    # Calculate difficulty for each word
    words = split_words_from_text(text)

    for word in words:
        ranked_word = RankedWord.find_cache(word, language)
        difficulty = word_difficulty(known_probabilities, True, rank_boundary, ranked_word, word)
        word_difficulties.append(difficulty)

    # If we can't compute the text difficulty, we estimate hard
    if (len(word_difficulties)) == 0:
        return \
            dict(
                score_median=1,
                score_average=1,
                estimated_difficulty=1)

    # Average difficulty for text
    difficulty_average = sum(word_difficulties) / float(len(word_difficulties))

    # Median difficulty
    word_difficulties.sort()
    center = int(round(len(word_difficulties) / 2, 0))
    difficulty_median = word_difficulties[center]

    difficulty_scores = dict(
        score_median=difficulty_median,
        score_average=difficulty_average,
        estimated_difficulty=discrete_text_difficulty(difficulty_average, difficulty_median))

    return difficulty_scores
 def known_words_list(self):
     lang_id = Language.find(self.lang_code)
     bookmarks = self.user.all_bookmarks()
     known_words = []
     filtered_known_words_from_user = []
     filtered_known_words_dict_list = []
     for bookmark in bookmarks:
         if bookmark.check_is_latest_outcome_too_easy():
             known_words.append(bookmark.origin.word)
     for word_known in known_words:
         if RankedWord.exists(word_known, lang_id):
             filtered_known_words_from_user.append(word_known)
             # zeeguu.db.session.commit()
     filtered_known_words_from_user = list(
         set(filtered_known_words_from_user))
     for word in filtered_known_words_from_user:
         filtered_known_words_dict_list.append({'word': word})
     return filtered_known_words_dict_list
예제 #10
0
    def calculate_probabilities_after_adding_a_bookmark(self, user,language):
        """
        ML: This has to be refactored.
        It's a mess.

         The idea is: you've just added a bookmark.
         There are two things to do:

          1. update the probabilities of the context words (they have been
          encountered, and not translated)

          2. update the probabilities of the word itself

         -


        :param user:
        :param language:
        :return:
        """

        # 1. computations for adding encounter based probability for the context words
        for word in self.context_words_with_rank():
            enc_prob = EncounterBasedProbability.find_or_create(word, user, language)
            zeeguu.db.session.add(enc_prob)
            zeeguu.db.session.commit()
            user_word = None
            ranked_word = enc_prob.ranked_word
            if UserWord.exists(word,language):
                user_word = UserWord.find(word,language)
                if ExerciseBasedProbability.exists(user,user_word): #checks if exercise based probability exists for words in context
                    ex_prob = ExerciseBasedProbability.find(user,user_word)
                    known_word_prob = KnownWordProbability.find(user,user_word,ranked_word)
                    known_word_prob.probability = known_word_prob.calculateKnownWordProb(ex_prob.probability, enc_prob.probability) #updates known word probability as exercise based probability already existed.
            else:
                if KnownWordProbability.exists(user, user_word,ranked_word):
                    known_word_prob = KnownWordProbability.find(user,user_word,ranked_word)
                    known_word_prob.probability = enc_prob.probability # updates known word probability as encounter based probability already existed
                else:
                    known_word_prob = KnownWordProbability.find(user,user_word,ranked_word, enc_prob.probability) # new known word probability created as it did not exist
                    zeeguu.db.session.add(known_word_prob)

        # 2. Update the probabilities of the word itself

        # 2.a) exercise based prob
        # ML: Should this thing change?
        # The ex based probability should probably not change after I add a bookmark
        # Commenting out the following lines: s
        # ex_prob = ExerciseBasedProbability.find(user, self.origin)
        # if ex_prob:
        #     ex_prob.update_probability_after_adding_bookmark_with_same_word(self,user)
        #     zeeguu.db.session.add(ex_prob)

        # 2.b) encounter based prob
        ranked_word = RankedWord.find(self.origin.word, language)
        if ranked_word: #checks if ranked_word exists for that looked up word
            if EncounterBasedProbability.exists(user, ranked_word): # checks if encounter based probability exists for that looked up word
                enc_prob = EncounterBasedProbability.find(user, ranked_word)
                enc_prob.word_has_just_beek_bookmarked()
                db.session.add(enc_prob)
                db.session.commit()

            # 2.c) update known word probability if it exists
            if KnownWordProbability.exists(user, self.origin,ranked_word):
                known_word_prob = KnownWordProbability.find(user,self.origin,ranked_word)
                known_word_prob.word_has_just_beek_bookmarked()
                db.session.add(known_word_prob)
                db.session.commit()
예제 #11
0
        app.config["SQLALCHEMY_DATABASE_URI"] = db_connection_string
    else:
        #  Ooops: we are not testing, and we don't have a DB configured!
        if not "SQLALCHEMY_DATABASE_URI" in app.config:
            print("No db configured. You probably have no config file...")
            exit()

    print("->>  DB Connection String: " +
          app.config["SQLALCHEMY_DATABASE_URI"])

    # getting rid of a warning in new version of SQLAlchemy
    app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False


setup_db_connection()
env = flask.ext.assets.Environment(app)
env.cache = app.instance_path
env.directory = os.path.join(app.instance_path, "gen")
env.url = "/gen"
env.append_path(
    os.path.join(os.path.dirname(os.path.abspath(__file__)), "static"),
    "/static")

db.init_app(app)
db.create_all(app=app)

from zeeguu.model.ranked_word import RankedWord

with app.app_context():
    RankedWord.cache_ranked_words()
예제 #12
0
 def find_rank(cls, word, language):
     return RankedWord.find(word, language)
예제 #13
0
    def calculate_probabilities_after_adding_a_bookmark(self, user, language):
        """
        ML: This has to be refactored.
        It's a mess.

         The idea is: you've just added a bookmark.
         There are two things to do:

          1. update the probabilities of the context words (they have been
          encountered, and not translated)

          2. update the probabilities of the word itself

         -


        :param user:
        :param language:
        :return:
        """

        # 1. computations for adding encounter based probability for the context words
        for word in self.context_words_with_rank():
            enc_prob = EncounterBasedProbability.find_or_create(
                word, user, language)
            zeeguu.db.session.add(enc_prob)
            zeeguu.db.session.commit()
            user_word = None
            ranked_word = enc_prob.ranked_word
            if UserWord.exists(word, language):
                user_word = UserWord.find(word, language)
                if ExerciseBasedProbability.exists(
                        user, user_word
                ):  #checks if exercise based probability exists for words in context
                    ex_prob = ExerciseBasedProbability.find(user, user_word)
                    known_word_prob = KnownWordProbability.find(
                        user, user_word, ranked_word)
                    known_word_prob.probability = known_word_prob.calculateKnownWordProb(
                        ex_prob.probability, enc_prob.probability
                    )  #updates known word probability as exercise based probability already existed.
            else:
                if KnownWordProbability.exists(user, user_word, ranked_word):
                    known_word_prob = KnownWordProbability.find(
                        user, user_word, ranked_word)
                    known_word_prob.probability = enc_prob.probability  # updates known word probability as encounter based probability already existed
                else:
                    known_word_prob = KnownWordProbability.find(
                        user, user_word, ranked_word, enc_prob.probability
                    )  # new known word probability created as it did not exist
                    zeeguu.db.session.add(known_word_prob)

        # 2. Update the probabilities of the word itself

        # 2.a) exercise based prob
        # ML: Should this thing change?
        # The ex based probability should probably not change after I add a bookmark
        # Commenting out the following lines: s
        # ex_prob = ExerciseBasedProbability.find(user, self.origin)
        # if ex_prob:
        #     ex_prob.update_probability_after_adding_bookmark_with_same_word(self,user)
        #     zeeguu.db.session.add(ex_prob)

        # 2.b) encounter based prob
        ranked_word = RankedWord.find(self.origin.word, language)
        if ranked_word:  #checks if ranked_word exists for that looked up word
            if EncounterBasedProbability.exists(
                    user, ranked_word
            ):  # checks if encounter based probability exists for that looked up word
                enc_prob = EncounterBasedProbability.find(user, ranked_word)
                enc_prob.word_has_just_beek_bookmarked()
                db.session.add(enc_prob)
                db.session.commit()

            # 2.c) update known word probability if it exists
            if KnownWordProbability.exists(user, self.origin, ranked_word):
                known_word_prob = KnownWordProbability.find(
                    user, self.origin, ranked_word)
                known_word_prob.word_has_just_beek_bookmarked()
                db.session.add(known_word_prob)
                db.session.commit()
예제 #14
0
 def find_rank(cls, word, language):
     return RankedWord.find(word, language)