def calcualte_useless_articles(cls): from people.mixins import default_key_user full_warm_seq_ids = default_key_user.get_full_dataset() articles = cls.objects(seq_id__in=full_warm_seq_ids).only('id', 'seq_id', 'category', 'published_at', 'quality', 'hot') line_article = cls.get_earliest_valid_obj() outdate_articles = [article for article in articles if article.seq_id < line_article.seq_id] usable_articles = set(articles) - set(outdate_articles) useless_articles = outdate_articles today = date.today() now = datetime.utcnow() valid_deadline = now - VALID_DURATION update_time = get_global_cal_time() for pubdate, _articles in groupby(sorted(useless_articles, key=lambda a: a.published_at.date()), lambda a: a.published_at.date()): if pubdate >= now.date(): continue __articles = list(_articles) __aritlces = sorted(__articles, key=lambda _ar: calculate_scores(0, 0, _ar.published_at, _ar.quality, _ar.hot, 0)[0], reverse=True) count = len(__articles) if pubdate < valid_deadline.date(): useless_articles.extend(__articles) else: useless_articles.extend(__articles[count * 3 / 4:]) return useless_articles
def get_all_candidate_article_seq_ids(cls): from people.mixins import default_key_user return default_key_user.get_full_dataset()