def system_languages(): result = dict() result["learnable_languages"] = list( map((lambda x: dict(name=x.name, code=x.code)), Language.available_languages())) result["native_languages"] = list( map((lambda x: dict(name=x.name, code=x.code)), Language.native_languages())) return result
def create_own_cohort(): """ Creates a cohort in the database. Requires form input (inv_code, name, language_id, max_students, teacher_id) """ def _link_teacher_cohort(user_id, cohort_id): """ Takes user_id and cohort_id and links them together in teacher_cohort_map table. """ from zeeguu.core.model import TeacherCohortMap user = User.find_by_id(user_id) cohort = Cohort.find(cohort_id) db.session.add(TeacherCohortMap(user, cohort)) db.session.commit() return "added teacher_cohort relationship" params = request.form inv_code = params.get("inv_code") name = params.get("name") # language_id is deprecated and kept here for backwards compatibility # use language_code instead language_code = params.get("language_code") or params.get("language_id") if name is None or inv_code is None or language_code is None: flask.abort(400) available_languages = Language.available_languages() code_allowed = False for code in available_languages: if language_code in str(code): code_allowed = True if not code_allowed: flask.abort(400) language = Language.find_or_create(language_code) teacher_id = flask.g.user.id max_students = params.get("max_students") if int(max_students) < 1: flask.abort(400) try: c = Cohort(inv_code, name, language, max_students) db.session.add(c) db.session.commit() _link_teacher_cohort(teacher_id, c.id) return "OK" except ValueError: flask.abort(400) return "ValueError" except sqlalchemy.exc.IntegrityError: flask.abort(400) return "IntegrityError"
def update_cohort(cohort_id): """ changes details of a specified cohort. requires input form (inv_code, name, max_students) """ check_permission_for_cohort(cohort_id) try: params = request.form cohort_to_change = Cohort.query.filter_by(id=cohort_id).one() cohort_to_change.inv_code = params.get("inv_code") cohort_to_change.name = params.get("name") # language_id is deprecated; use language_code instead language_code = params.get("language_code") or params.get( "language_id") cohort_to_change.language_id = Language.find(language_code).id cohort_to_change.declared_level_min = params.get("declared_level_min") cohort_to_change.declared_level_max = params.get("declared_level_max") db.session.commit() return "OK" except ValueError: flask.abort(400) return "ValueError" except sqlalchemy.exc.IntegrityError: flask.abort(400) return "IntegrityError"
def _reading_preferences_hash(user): """ Method to retrieve the hash, as this is done several times. :param user: :return: articles_hash: ArticlesHash """ user_filter_subscriptions = TopicFilter.all_for_user(user) filters = [topic_id.topic for topic_id in user_filter_subscriptions] user_topic_subscriptions = TopicSubscription.all_for_user(user) topics = [topic_id.topic for topic_id in user_topic_subscriptions] user_languages = Language.all_reading_for_user(user) user_search_filters = SearchFilter.all_for_user(user) search_filters = [search_id.search for search_id in user_search_filters] user_searches = SearchSubscription.all_for_user(user) searches = [search_id.search for search_id in user_searches] articles_hash = ArticlesCache.calculate_hash(user, topics, filters, searches, search_filters, user_languages) return articles_hash
def get_interesting_topics(): """ Get a list of interesting topics for the given language. Interesting topics are for now defined as: - The topic is not followed yet - The topic is not in the filters list - There are articles with that topic in the language :return: """ topic_data = [] already_filtered = [ each.topic for each in TopicFilter.all_for_user(flask.g.user) ] already_subscribed = [ each.topic for each in TopicSubscription.all_for_user(flask.g.user) ] reading_languages = Language.all_reading_for_user(flask.g.user) loc_topics = [] for each in reading_languages: loc_topics.extend(LocalizedTopic.all_for_language(each)) topics = [each.topic for each in loc_topics] for topic in topics: if (topic not in already_filtered) and (topic not in already_subscribed): topic_data.append(topic.as_dictionary()) return json_result(topic_data)
def recompute_for_topics_and_languages(): from zeeguu.core.model import Topic, Language for each in Topic.get_all_topics(): each.all_articles() for each in Language.available_languages(): each.get_articles()
def available_native_languages(): """ :return: jason with language codes for the supported native languages. curently only english... e.g. ["en", "fr", "de", "it", "no", "ro"]unquote_plus(flask.r """ available_language_codes = list( map((lambda x: x.code), Language.native_languages())) return json.dumps(available_language_codes)
def __init__( self, email, name, password, learned_language=None, native_language=None, invitation_code=None, cohort=None, ): self.email = email self.name = name self.update_password(password) self.learned_language = learned_language or Language.default_learned() self.native_language = native_language or Language.default_native_language( ) self.invitation_code = invitation_code self.cohort = cohort
def available_languages(): """ :return: jason with language codes for the supported languages. e.g. ["en", "fr", "de", "it", "no", "ro"] """ available_language_codes = list( map((lambda x: x.code), Language.available_languages())) return json.dumps(available_language_codes)
def cleanup_all_articles_in_language(language_code): language_id = Language.find(language_code).id all_articles = Article.query.filter_by(language_id=language_id).all() for each in all_articles: cleaned_content = cleanup_non_content_bits(each.content) if cleaned_content != each.content: each.content = cleaned_content zeeguu.core.db.session.add(each) print(each.title + "\n\n") zeeguu.core.db.session.commit()
def create_anonymous(cls, uuid, password, learned_language_code=None, native_language_code=None): """ :param uuid: :param password: :param learned_language_code: :param native_language_code: :return: """ # since the DB must have an email we generate a fake one fake_email = uuid + cls.ANONYMOUS_EMAIL_DOMAIN if learned_language_code is not None: try: learned_language = Language.find_or_create( learned_language_code) except NoResultFound as e: learned_language = None else: learned_language = None if native_language_code is not None: try: native_language = Language.find_or_create(native_language_code) except NoResultFound as e: native_language = None else: native_language = None new_user = cls( fake_email, uuid, password, learned_language=learned_language, native_language=native_language, ) return new_user
def set_learned_language_level(self, language_code: str, level: str, session=None): learned_language = Language.find_or_create(language_code) from zeeguu.core.model import UserLanguage language = UserLanguage.find_or_create(session, self, learned_language) language.cefr_level = int(level) if session: session.add(language)
def upload_own_text(): db_session.rollback() language = Language.find_or_create(request.form.get("language", "")) content = request.form.get("content", "") title = request.form.get("title", "") new_article_id = Article.create_from_upload(db_session, title, content, flask.g.user, language) return str(new_article_id)
def __init__(self): super().__init__() self.rss_feed = self._create_model_object() self.feed = self.rss_feed self.save(self.rss_feed) lang1 = Language.find_or_create('de') url = Url.find_or_create(self.db.session, url_spiegel_rss) self.feed1 = RSSFeed.find_or_create(self.db.session, url, "", "", icon_name_spiegel, language=lang1) self.save(self.feed1)
def analyze_user(user_id, language, only_liked=ANALYZE_ONLY_LIKED): user = User.find_by_id(user_id) language_id = Language.find(language).id macro_sessions = extract_macro_sessions_from_db(user, language_id) macro_sessions = filter_sessions(macro_sessions, only_liked) if PRINT_DETAIL: for macro_session in macro_sessions: macro_session.print_details() input("<Enter to continue>") summarize_yearly_reading_speed(macro_sessions)
def _get_past_translation( word: str, from_lang_code: str, to_lang_code: str, context: str, user: User = None ): try: from_language = Language.find(from_lang_code) to_language = Language.find(to_lang_code) try: origin_word = UserWord.find(word, from_language) text = Text.query.filter_by(content=context).one() except NoResultFound: return None query = Bookmark.query.join( UserWord, UserWord.id == Bookmark.translation_id ).filter( UserWord.language_id == to_language.id, Bookmark.origin_id == origin_word.id, # Bookmark.origin_id == origin_word.id, Bookmark.text_id == text.id, ) if user: query = query.filter(Bookmark.user_id == user.id) # prioritize older users query.order_by(Bookmark.user_id.asc()) if query.first(): return query.first().translation.word else: return None except Exception as e: capture_exception(e) return None
def update_own_text(article_id): language = Language.find_or_create(request.form.get("language", "")) content = request.form.get("content", "") title = request.form.get("title", "") a = Article.query.filter(Article.id == article_id).one() a.update(language, content, title) db_session.add(a) db_session.commit() return "OK"
def cache_articles(code): if code != zeeguu.core.app.config.get("PRIVATE_API_CODE"): return "Nope" from zeeguu.core.model import Topic, Language for each in Topic.get_all_topics(): each.all_articles() for each in Language.available_languages(): each.get_articles() return "OK"
def article_recommendations_for_user(user, count): """ Retrieve :param count articles which are equally distributed over all the feeds to which the :param user is registered to. Fails if no language is selected. :return: """ # Temporary fix for the experiment of Gabriel AIKI_USERS_COHORT_ID = 109 if user.cohort_id == AIKI_USERS_COHORT_ID: return CohortArticleMap.get_articles_info_for_cohort(user.cohort) import zeeguu.core user_languages = Language.all_reading_for_user(user) if not user_languages: return [user.learned_language] reading_pref_hash = _reading_preferences_hash(user) _recompute_recommender_cache_if_needed(user, zeeguu.core.db.session) # two fast calls ot /articles/recommended might result in a race condition # in _recompute_recommender_cache; # race condition in _recompute_recommender_cache might result in # duplicates in the db; since this is being sunset for the elastic search # it's not worth fixing the race condition; instead we're simply # ensuring that duplicate articles are removed at this point all_articles = set( ArticlesCache.get_articles_for_hash(reading_pref_hash, count)) all_articles = [ each for each in all_articles if (not each.broken and each.published_time) ] all_articles = SortedList(all_articles, lambda x: x.published_time) return [ UserArticle.user_article_info(user, article) for article in reversed(all_articles) ]
def upload_articles(cohort_id): """ uploads articles for a cohort with input from a POST request """ check_permission_for_cohort(cohort_id) try: for article_data in json.loads(request.data): url = Url("userarticle/{}".format(uuid.uuid4().hex)) title = article_data["title"] authors = article_data["authors"] content = article_data["content"] summary = article_data["summary"] published_time = datetime.now() language_code = article_data["language_code"] language = Language.find(language_code) new_article = Article( url, title, authors, content, summary, published_time, None, # rss feed language, ) db.session.add(new_article) db.session.flush() db.session.refresh(new_article) cohort = Cohort.find(cohort_id) now = datetime.now() new_cohort_article_map = CohortArticleMap(cohort, new_article, now) db.session.add(new_cohort_article_map) db.session.commit() return "OK" except ValueError: flask.abort(400) return "ValueError"
def _find_articles_for_user(user): """ This method gets all the topic and search subscriptions for a user. It then returns all the articles that are associated with these. :param user: :return: """ user_languages = Language.all_reading_for_user(user) topic_subscriptions = TopicSubscription.all_for_user(user) search_subscriptions = SearchSubscription.all_for_user(user) subscribed_articles = _filter_subscribed_articles(search_subscriptions, topic_subscriptions, user_languages, user) return subscribed_articles
def tts(): import zeeguu.core from zeeguu.core.model import UserWord, Language db_session = zeeguu.core.db.session text_to_pronounce = request.form.get("text", "") language_id = request.form.get("language_id", "") if not text_to_pronounce: return "" user_word = UserWord.find_or_create(db_session, text_to_pronounce, Language.find_or_create(language_id)) audio_file_path = _file_name_for_user_word(user_word, language_id) if not os.path.isfile(DATA_FOLDER + audio_file_path): _save_speech_to_file(user_word, language_id, audio_file_path) return audio_file_path
def set_learned_language(self, language_code, session=None): self.learned_language = Language.find(language_code) from zeeguu.core.model import UserLanguage # disable the exercises and reading for all the other languages all_other_languages = (UserLanguage.query.filter( User.id == self.id).filter( UserLanguage.doing_exercises == True).all()) for each in all_other_languages: each.doing_exercises = False each.reading_news = False if session: session.add(each) language = UserLanguage.find_or_create(session, self, self.learned_language) language.reading_news = True language.doing_exercises = True if session: session.add(language)
def _get_user_articles_sources_languages(user, limit=1000): """ This method is used to get all the user articles for the sources if there are any selected sources for the user, and it otherwise gets all the articles for the current learning languages for the user. :param user: the user for which the articles should be fetched :param limit: the amount of articles for each source or language :return: a list of articles based on the parameters """ user_languages = Language.all_reading_for_user(user) all_articles = [] for language in user_languages: info(f"Getting articles for {language}") new_articles = language.get_articles(most_recent_first=True) all_articles.extend(new_articles) info(f"Added {len(new_articles)} articles for {language}") return all_articles
def articles_correlations(): articles_df = pd.DataFrame(columns=[ "id", "lang", "difficulty", "word_count", "title_length", "opened", "translated", "spoken", "liked", "closed" ]) all_users = User.find_all() print(len(all_users)) for reading_language in languages_to_analyze: print("\nLANGUAGE:", reading_language) language_id = Language.find(reading_language).id for user in tqdm(all_users): if user.learned_language_id == language_id: events = UserActivityData.find(user) for event in events: article_id = event.article_id if article_id: article_data = Article.find_by_id(article_id) if article_data.language_id == language_id: if not (articles_df['id'] == article_id).any(): title_len = len(article_data.title.split()) df = { "id": article_id, "lang": article_data.language_id, "difficulty": article_data.fk_difficulty, "word_count": article_data.word_count, "title_length": title_len, "opened": 0, "translated": 0, "spoken": 0, "liked": 0, "closed": 0 } articles_df = articles_df.append( df, ignore_index=True) if event.event == "UMR - OPEN ARTICLE": articles_df.loc[articles_df.id == article_id, 'opened'] += 1 if event.event == "UMR - TRANSLATE TEXT": articles_df.loc[articles_df.id == article_id, 'translated'] += 1 if event.event == "UMR - SPEAK TEXT": articles_df.loc[articles_df.id == article_id, 'spoken'] += 1 if event.event == "UMR - LIKE ARTICLE": articles_df.loc[articles_df.id == article_id, 'liked'] += 1 if event.event == "UMR - ARTICLE CLOSED": articles_df.loc[articles_df.id == article_id, 'closed'] += 1 print("Articles:", len(articles_df)) correlation_variables = [ "word_count", "difficulty", "liked", "translated", "spoken", "opened", "closed", "title_length" ] # word count & fk_difficulty spearman_corr = stats.spearmanr(articles_df[correlation_variables[0]], articles_df[correlation_variables[1]]) print(correlation_variables[0], correlation_variables[1], spearman_corr[0], spearman_corr[1]) # liked & fk_difficulty spearman_corr = stats.spearmanr(articles_df[correlation_variables[2]], articles_df[correlation_variables[1]]) print(correlation_variables[2], correlation_variables[1], spearman_corr[0], spearman_corr[1]) # number of translations & difficulty spearman_corr = stats.spearmanr(articles_df[correlation_variables[3]], articles_df[correlation_variables[1]]) print(correlation_variables[3], correlation_variables[1], spearman_corr[0], spearman_corr[1]) # number of spoken words & difficulty spearman_corr = stats.spearmanr(articles_df[correlation_variables[4]], articles_df[correlation_variables[1]]) print(correlation_variables[4], correlation_variables[1], spearman_corr[0], spearman_corr[1]) # number of times article is opened & difficulty spearman_corr = stats.spearmanr(articles_df[correlation_variables[5]], articles_df[correlation_variables[1]]) print(correlation_variables[5], correlation_variables[1], spearman_corr[0], spearman_corr[1]) # number of times article is closed & difficulty spearman_corr = stats.spearmanr(articles_df[correlation_variables[6]], articles_df[correlation_variables[1]]) print(correlation_variables[6], correlation_variables[1], spearman_corr[0], spearman_corr[1]) # title length & fk_difficulty spearman_corr = stats.spearmanr(articles_df[correlation_variables[7]], articles_df[correlation_variables[1]]) print(correlation_variables[7], correlation_variables[1], spearman_corr[0], spearman_corr[1]) # title length & number of times article is opened spearman_corr = stats.spearmanr(articles_df[correlation_variables[5]], articles_df[correlation_variables[7]]) print(correlation_variables[5], correlation_variables[7], spearman_corr[0], spearman_corr[1])
feed_name = input( f"Feed name (Enter for: {test_feed.title}): ") or test_feed.title print(f'= {feed_name}') icon_name = input( "Icon name to be found in resources folder (e.g. 20min.png): ") print(f'= {icon_name}') description = input(f'Description (Enter for: {test_feed.description}): ' ) or test_feed.description print(f'= {description}') _language = input("Language code (e.g. en): ") print(f'= {_language}') feed_url = Url.find_or_create(zeeguu.core.db.session, _feed_url) language = Language.find_or_create(_language) rss_feed = RSSFeed.find_or_create(zeeguu.core.db.session, feed_url, feed_name, description, icon_name=icon_name, language=language) print("Done: ") print(rss_feed.title) print(rss_feed.description) print(rss_feed.language_id) print(rss_feed.url.as_string())
def article_search_for_user(user, count, search_terms): """ Handles searching. Find the relational values from the database and use them to search in elasticsearch for relative articles. :param user: :param count: max amount of articles to return :param search_terms: the inputed search string by the user :return: articles """ user_languages = Language.all_reading_for_user(user) per_language_article_count = count / len(user_languages) final_article_mix = [] for language in user_languages: print(f"language: {language}") # 0. Ensure appropriate difficulty declared_level_min, declared_level_max = user.levels_for(language) lower_bounds = declared_level_min * 10 upper_bounds = declared_level_max * 10 # 1. Unwanted user topics # ============================== user_search_filters = SearchFilter.all_for_user(user) unwanted_user_topics = [] for user_search_filter in user_search_filters: unwanted_user_topics.append(user_search_filter.search.keywords) print(f"keywords to exclude: {unwanted_user_topics}") # 2. Topics to exclude / filter out # ================================= excluded_topics = TopicFilter.all_for_user(user) topics_to_exclude = [each.topic.title for each in excluded_topics] print(f"topics to exclude: {topics_to_exclude}") # 3. Topics subscribed, and thus to include # ========================================= topic_subscriptions = TopicSubscription.all_for_user(user) topics_to_include = [ subscription.topic.title for subscription in TopicSubscription.all_for_user(user) ] print(f"topics to include: {topic_subscriptions}") # 4. Wanted user topics # ========================================= user_subscriptions = SearchSubscription.all_for_user(user) wanted_user_topics = [] for sub in user_subscriptions: wanted_user_topics.append(sub.search.keywords) print(f"keywords to include: {wanted_user_topics}") # build the query using elastic_query_builder query_body = build_elastic_query( per_language_article_count, search_terms, _list_to_string(topics_to_include), _list_to_string(topics_to_exclude), _list_to_string(wanted_user_topics), _list_to_string(unwanted_user_topics), language, upper_bounds, lower_bounds, ) es = Elasticsearch(ES_CONN_STRING) res = es.search(index=ES_ZINDEX, body=query_body) hit_list = res["hits"].get("hits") final_article_mix.extend(_to_articles_from_ES_hits(hit_list)) # convert to article_info and return return [ UserArticle.user_article_info(user, article) for article in final_article_mix if article is not None ]
import zeeguu.core from zeeguu.core.model import Article, Language, LocalizedTopic session = zeeguu.core.db.session counter = 0 languages = Language.available_languages() languages = [Language.find('da')] for language in languages: articles = Article.query.filter(Article.language == language).order_by( Article.id.desc()).all() loc_topics = LocalizedTopic.all_for_language(language) total_articles = len(articles) for article in articles: counter += 1 print(f"{article.title}") print(f"{article.url.as_string()}") for loc_topic in loc_topics: if loc_topic.matches_article(article): article.add_topic(loc_topic.topic) print(f" #{loc_topic.topic_translated}") print("") session.add(article) if counter % 1000 == 0: percentage = (100 * counter / total_articles) / 100 print(
counter = 0 fixed = 0 for each in all_danish_articles: flattened = flatten_composed_unicode_characters(each.content) if flattened != each.content: fixed += 1 print(f"Fixing article with id: {each.id}") each.content = flattened db.session.add(each) counter += 1 if counter % 10000 == 0: print(f"... {counter}") db.session.commit() db.session.close() print(f"Fixed a total of: {fixed} articles!") if __name__ == '__main__': if len(argv) < 2: print("ERROR: Provide language code as argument") exit(-1) language = Language.find_or_create(argv[1]) print(f"Looking up articles in: {language.name}") flatten_the_unicode_characters(language)
def create_account( db_session, username, password, invite_code, email, learned_language_code, native_language_code, learned_cefr_level, ): cohort_name = "" if password is None or len(password) < 4: raise Exception("Password should be at least 4 characters long") if not valid_invite_code(invite_code): raise Exception("Invitation code is not recognized. Please contact us.") cohort = Cohort.query.filter_by(inv_code=invite_code).first() if cohort: if cohort.cohort_still_has_capacity(): cohort_name = cohort.name else: raise Exception( "No more places in this class. Please contact us ([email protected])." ) try: learned_language = Language.find_or_create(learned_language_code) native_language = Language.find_or_create(native_language_code) new_user = User( email, username, password, invitation_code=invite_code, cohort=cohort, learned_language=learned_language, native_language=native_language, ) db_session.add(new_user) learned_language = UserLanguage.find_or_create( db_session, new_user, learned_language ) learned_language.cefr_level = int(learned_cefr_level) # TODO: although these are required... they should simply # be functions of CEFR level so at some further point should # removed learned_language.declared_level_min = 0 learned_language.declared_level_max = 11 db_session.add(learned_language) if cohort: if cohort.is_cohort_of_teachers: teacher = Teacher(new_user) db_session.add(teacher) db_session.commit() send_new_user_account_email(username, invite_code, cohort_name) return new_user except sqlalchemy.exc.IntegrityError: raise Exception("There is already an account for this email.") except Exception as e: print(e) raise Exception("Could not create the account")