def set_know_word_prob(): zeeguu.app.test_request_context().push() zeeguu.db.session.commit() enc_probs = EncounterBasedProbability.find_all() ex_probs = ExerciseBasedProbability.find_all() for prob in enc_probs: user = prob.user word = prob.ranked_word.word language = prob.ranked_word.language user_word = None if UserWord.exists(word, language): user_word = UserWord.find(word, language) if ExerciseBasedProbability.exists(user, user_word): ex_prob = ExerciseBasedProbability.find(user, user_word) known_word_prob = KnownWordProbability.calculateKnownWordProb(ex_prob.probability, prob.probability) known_word_probability_obj = KnownWordProbability.find(user, user_word, prob.ranked_word, known_word_prob) else: known_word_probability_obj = KnownWordProbability.find(user, None, prob.ranked_word, prob.probability) zeeguu.db.session.add(known_word_probability_obj) zeeguu.db.session.commit() for prob in ex_probs: user = prob.user language = prob.user_word.language word = prob.user_word.word ranked_word = None if RankedWord.exists(word, language): ranked_word = RankedWord.find(word, language) if not EncounterBasedProbability.exists(user, ranked_word): if UserWord.exists(word, language): user_word = UserWord.find(word, language) known_word_probability_obj = KnownWordProbability(user, user_word, ranked_word, prob.probability) zeeguu.db.session.add(known_word_probability_obj) zeeguu.db.session.commit() print "job3"
def set_know_word_prob(): zeeguu.app.test_request_context().push() zeeguu.db.session.commit() enc_probs = EncounterBasedProbability.find_all() ex_probs = ExerciseBasedProbability.find_all() for prob in enc_probs: user = prob.user word = prob.ranked_word.word language = prob.ranked_word.language user_word = None if UserWord.exists(word, language): user_word = UserWord.find(word, language) if ExerciseBasedProbability.exists(user, user_word): ex_prob = ExerciseBasedProbability.find(user, user_word) known_word_prob = KnownWordProbability.calculateKnownWordProb( ex_prob.probability, prob.probability) known_word_probability_obj = KnownWordProbability.find( user, user_word, prob.ranked_word, known_word_prob) else: known_word_probability_obj = KnownWordProbability.find( user, None, prob.ranked_word, prob.probability) zeeguu.db.session.add(known_word_probability_obj) zeeguu.db.session.commit() for prob in ex_probs: user = prob.user language = prob.user_word.language word = prob.user_word.word ranked_word = None if RankedWord.exists(word, language): ranked_word = RankedWord.find(word, language) if not EncounterBasedProbability.exists(user, ranked_word): if UserWord.exists(word, language): user_word = UserWord.find(word, language) known_word_probability_obj = KnownWordProbability( user, user_word, ranked_word, prob.probability) zeeguu.db.session.add(known_word_probability_obj) zeeguu.db.session.commit() print 'job3'
def get_difficulty_for_text(lang_code): """ URL parameters: :param lang_code: the language of the text Json data: :param texts: json array that contains the texts to calculate the difficulty for. Each text consists of an array with the text itself as 'content' and an additional 'id' which gets roundtripped unchanged :param personalized (optional): calculate difficulty score for a specific user? (Enabled by default) :param rank_boundary (optional): upper boundary for word frequency rank (between 1 and 10'000) :return difficulties: json array, contains the difficulties as arrays with the key 'score_median' for the median and 'score_average' for the average difficulty the value (between 0 (easy) and 1 (hard)) and the 'id' parameter to identify the corresponding text """ language = Language.find(lang_code) if language is None: return 'FAIL' data = flask.request.get_json() texts = [] if 'texts' in data: for text in data['texts']: texts.append(text) else: return 'FAIL' personalized = True if 'personalized' in data: personalized = data['personalized'].lower() if personalized == 'false' or personalized == '0': personalized = False rank_boundary = 10000.0 if 'rank_boundary' in data: rank_boundary = float(data['rank_boundary']) if rank_boundary > 10000.0: rank_boundary = 10000.0 user = flask.g.user known_probabilities = KnownWordProbability.find_all_by_user_cached(user) difficulties = [] for text in texts: # Calculate difficulty for each word words = util.split_words_from_text(text['content']) words_difficulty = [] for word in words: ranked_word = RankedWord.find_cache(word, language) word_difficulty = 1.0 # Value between 0 (easy) and 1 (hard) if ranked_word is not None: # Check if the user knows the word try: known_propability = known_probabilities[ word] # Value between 0 (unknown) and 1 (known) except KeyError: known_propability = None if personalized and known_propability is not None: word_difficulty -= float(known_propability) elif ranked_word.rank <= rank_boundary: word_frequency = ( rank_boundary - (ranked_word.rank - 1) ) / rank_boundary # Value between 0 (rare) and 1 (frequent) word_difficulty -= word_frequency words_difficulty.append(word_difficulty) # Uncomment to print data for histogram generation #text.generate_histogram(words_difficulty) # Median difficulty for text words_difficulty.sort() center = int(round(len(words_difficulty) / 2, 0)) difficulty_median = words_difficulty[center] # Average difficulty for text difficulty_average = sum(words_difficulty) / float( len(words_difficulty)) difficulties.append( dict(score_median=difficulty_median, score_average=difficulty_average, id=text['id'])) response = json.dumps(dict(difficulties=difficulties)) return flask.Response(response, status=200, mimetype='application/json')
def get_difficulty_for_text(lang_code): """ URL parameters: :param lang_code: the language of the text Json data: :param texts: json array that contains the texts to calculate the difficulty for. Each text consists of an array with the text itself as 'content' and an additional 'id' which gets roundtripped unchanged :param personalized (optional): calculate difficulty score for a specific user? (Enabled by default) :param rank_boundary (optional): upper boundary for word frequency rank (between 1 and 10'000) :return difficulties: json array, contains the difficulties as arrays with the key 'score_median' for the median and 'score_average' for the average difficulty the value (between 0 (easy) and 1 (hard)) and the 'id' parameter to identify the corresponding text """ language = Language.find(lang_code) if language is None: return 'FAIL' data = flask.request.get_json() texts = [] if 'texts' in data: for text in data['texts']: texts.append(text) else: return 'FAIL' personalized = True if 'personalized' in data: personalized = data['personalized'].lower() if personalized == 'false' or personalized == '0': personalized = False rank_boundary = 10000.0 if 'rank_boundary' in data: rank_boundary = float(data['rank_boundary']) if rank_boundary > 10000.0: rank_boundary = 10000.0 user = flask.g.user known_probabilities = KnownWordProbability.find_all_by_user_cached(user) difficulties = [] for text in texts: # Calculate difficulty for each word words = util.split_words_from_text(text['content']) words_difficulty = [] for word in words: ranked_word = RankedWord.find_cache(word, language) word_difficulty = 1.0 # Value between 0 (easy) and 1 (hard) if ranked_word is not None: # Check if the user knows the word try: known_propability = known_probabilities[word] # Value between 0 (unknown) and 1 (known) except KeyError: known_propability = None if personalized and known_propability is not None: word_difficulty -= float(known_propability) elif ranked_word.rank <= rank_boundary: word_frequency = (rank_boundary-(ranked_word.rank-1))/rank_boundary # Value between 0 (rare) and 1 (frequent) word_difficulty -= word_frequency words_difficulty.append(word_difficulty) # Uncomment to print data for histogram generation #text.generate_histogram(words_difficulty) # Median difficulty for text words_difficulty.sort() center = int(round(len(words_difficulty)/2, 0)) difficulty_median = words_difficulty[center] # Average difficulty for text difficulty_average = sum(words_difficulty) / float(len(words_difficulty)) difficulties.append(dict(score_median=difficulty_median, score_average=difficulty_average, id=text['id'])) response = json.dumps(dict(difficulties=difficulties)) return flask.Response(response, status=200, mimetype='application/json')