def get_cohorts_for_article(article_id): """ Gets all the cohorts for this article """ article = Article.find_by_id(article_id) return json.dumps(CohortArticleMap.get_cohorts_for_article(article))
def add_article_to_cohort(): """ Gets all the articles of this teacher """ cohort = Cohort.find(request.form.get("cohort_id")) check_permission_for_cohort(cohort.id) article = Article.find_by_id(request.form.get("article_id")) if not CohortArticleMap.find(cohort.id, article.id): now = datetime.now() new_mapping = CohortArticleMap(cohort, article, now) db.session.add(new_mapping) db.session.commit() return "OK"
def delete_article_from_cohort(): """ Gets all the articles of this teacher """ cohort = Cohort.find(request.form.get("cohort_id")) check_permission_for_cohort(cohort.id) article = Article.find_by_id(request.form.get("article_id")) mapping = CohortArticleMap.find(cohort.id, article.id) if mapping: db.session.delete(mapping) db.session.commit() return "OK" else: return make_error(401, "That article does not belong to the cohort!")
def more_like_this_article(user, count, article_id): """ Given a article ID find more articles like that one via Elasticsearchs "more_like_this" method """ article = Article.find_by_id(article_id) query_body = build_more_like_this_query(count, article.content, article.language) es = Elasticsearch(ES_CONN_STRING) res = es.search(index=ES_ZINDEX, body=query_body) # execute search hit_list = res["hits"].get("hits") # TODO need to make sure either that the searched on article is always a part of the list \ # or that it is never there. # it could be used to show on website; you searched on X, here is what we found related to X final_article_mix = _to_articles_from_ES_hits(hit_list) return [ UserArticle.user_article_info(user, article) for article in final_article_mix ]
def articles_correlations(): articles_df = pd.DataFrame(columns=[ "id", "lang", "difficulty", "word_count", "title_length", "opened", "translated", "spoken", "liked", "closed" ]) all_users = User.find_all() print(len(all_users)) for reading_language in languages_to_analyze: print("\nLANGUAGE:", reading_language) language_id = Language.find(reading_language).id for user in tqdm(all_users): if user.learned_language_id == language_id: events = UserActivityData.find(user) for event in events: article_id = event.article_id if article_id: article_data = Article.find_by_id(article_id) if article_data.language_id == language_id: if not (articles_df['id'] == article_id).any(): title_len = len(article_data.title.split()) df = { "id": article_id, "lang": article_data.language_id, "difficulty": article_data.fk_difficulty, "word_count": article_data.word_count, "title_length": title_len, "opened": 0, "translated": 0, "spoken": 0, "liked": 0, "closed": 0 } articles_df = articles_df.append( df, ignore_index=True) if event.event == "UMR - OPEN ARTICLE": articles_df.loc[articles_df.id == article_id, 'opened'] += 1 if event.event == "UMR - TRANSLATE TEXT": articles_df.loc[articles_df.id == article_id, 'translated'] += 1 if event.event == "UMR - SPEAK TEXT": articles_df.loc[articles_df.id == article_id, 'spoken'] += 1 if event.event == "UMR - LIKE ARTICLE": articles_df.loc[articles_df.id == article_id, 'liked'] += 1 if event.event == "UMR - ARTICLE CLOSED": articles_df.loc[articles_df.id == article_id, 'closed'] += 1 print("Articles:", len(articles_df)) correlation_variables = [ "word_count", "difficulty", "liked", "translated", "spoken", "opened", "closed", "title_length" ] # word count & fk_difficulty spearman_corr = stats.spearmanr(articles_df[correlation_variables[0]], articles_df[correlation_variables[1]]) print(correlation_variables[0], correlation_variables[1], spearman_corr[0], spearman_corr[1]) # liked & fk_difficulty spearman_corr = stats.spearmanr(articles_df[correlation_variables[2]], articles_df[correlation_variables[1]]) print(correlation_variables[2], correlation_variables[1], spearman_corr[0], spearman_corr[1]) # number of translations & difficulty spearman_corr = stats.spearmanr(articles_df[correlation_variables[3]], articles_df[correlation_variables[1]]) print(correlation_variables[3], correlation_variables[1], spearman_corr[0], spearman_corr[1]) # number of spoken words & difficulty spearman_corr = stats.spearmanr(articles_df[correlation_variables[4]], articles_df[correlation_variables[1]]) print(correlation_variables[4], correlation_variables[1], spearman_corr[0], spearman_corr[1]) # number of times article is opened & difficulty spearman_corr = stats.spearmanr(articles_df[correlation_variables[5]], articles_df[correlation_variables[1]]) print(correlation_variables[5], correlation_variables[1], spearman_corr[0], spearman_corr[1]) # number of times article is closed & difficulty spearman_corr = stats.spearmanr(articles_df[correlation_variables[6]], articles_df[correlation_variables[1]]) print(correlation_variables[6], correlation_variables[1], spearman_corr[0], spearman_corr[1]) # title length & fk_difficulty spearman_corr = stats.spearmanr(articles_df[correlation_variables[7]], articles_df[correlation_variables[1]]) print(correlation_variables[7], correlation_variables[1], spearman_corr[0], spearman_corr[1]) # title length & number of times article is opened spearman_corr = stats.spearmanr(articles_df[correlation_variables[5]], articles_df[correlation_variables[7]]) print(correlation_variables[5], correlation_variables[7], spearman_corr[0], spearman_corr[1])
def _to_articles_from_ES_hits(hits): articles = [] for hit in hits: articles.append(Article.find_by_id(hit.get("_id"))) return articles