def test_get_most_recent_move_from_api(self, patched_site): patched_site.logevents.side_effect = lambda *args, **kwargs: self.le_multi move_data = logic_page.get_move_data(self.wp10db, self.wikidb, 0, b'Some Moved Article', datetime(1970, 1, 1)) self.assertIsNotNone(move_data) self.assertEqual(self.expected_ns, move_data['dest_ns']) self.assertEqual(self.expected_title.encode('utf-8'), move_data['dest_title']) self.assertEqual(self.expected_dt, move_data['timestamp_dt'])
def test_get_single_move_too_old_from_api(self, patched_site): patched_site.logevents.side_effect = lambda *args, **kwargs: self.le_return move_data = logic_page.get_move_data(self.wp10db, self.wikidb, 0, b'Some Moved Article', datetime(2014, 1, 1)) self.assertIsNone(move_data)
def test_no_redirect_no_move(self, unused_patched_site): move_data = logic_page.get_move_data(self.wp10db, self.wikidb, 0, b'Some Moved Article', datetime(1970, 1, 1)) self.assertIsNone(move_data)
def process_unseen_articles(wikidb, wp10db, project, old_ratings, seen): denom = len(old_ratings.keys()) ratio = len(seen) / denom if denom != 0 else 'NaN' logger.debug('Looking for unseen articles, ratio was: %s', ratio) in_seen = 0 skipped = 0 processed = 0 n = 0 for ref, old_rating in old_ratings.items(): if ref in seen: in_seen += 1 continue # By default, we evaluate both assessment kinds. kind = AssessmentKind.BOTH if old_rating.r_quality == NOT_A_CLASS or old_rating.r_quality is None: # The quality rating is not set, so just evaluate importance kind = AssessmentKind.IMPORTANCE if (old_rating.r_importance == NOT_A_CLASS or old_rating.r_importance is None): # The importance rating is also not set, so don't do anything. skipped += 1 continue logger.debug('Processing unseen article %s', ref.decode('utf-8')) processed += 1 ns, title = ref.decode('utf-8').split(':', 1) ns = int(ns.encode('utf-8')) title = title.encode('utf-8') move_data = logic_page.get_move_data(wp10db, wikidb, ns, title, project.timestamp_dt) if move_data is not None: logic_page.update_page_moved(wp10db, project, ns, title, move_data['dest_ns'], move_data['dest_title'], move_data['timestamp_dt']) # Mark this article as having NOT_A_CLASS for it's quality or importance. # This probably means the article was deleted, but could in fact mean that # we just failed to find its move data. Either way, the new article would # have already been picked up by the assessment updater, assuming it was # tagged correctly. rating = Rating(r_project=project.p_project, r_namespace=ns, r_article=title, r_score=0) if kind in (AssessmentKind.QUALITY, AssessmentKind.BOTH): rating.quality = NOT_A_CLASS.encode('utf-8') if move_data: rating.set_quality_timestamp_dt(move_data['timestamp_dt']) else: rating.r_quality_timestamp = GLOBAL_TIMESTAMP_WIKI if kind in (AssessmentKind.IMPORTANCE, AssessmentKind.BOTH): rating.importance = NOT_A_CLASS.encode('utf-8') if move_data: rating.set_importance_timestamp_dt(move_data['timestamp_dt']) else: rating.r_importance_timestamp = GLOBAL_TIMESTAMP_WIKI logic_rating.insert_or_update(wp10db, rating, kind) if kind in (AssessmentKind.QUALITY, AssessmentKind.BOTH): logic_rating.add_log_for_rating(wp10db, rating, AssessmentKind.QUALITY, old_rating.r_quality) if kind in (AssessmentKind.IMPORTANCE, AssessmentKind.BOTH): logic_rating.add_log_for_rating(wp10db, rating, AssessmentKind.IMPORTANCE, old_rating.r_importance) n += 1 if n >= MAX_ARTICLES_BEFORE_COMMIT: wp10db.ping() wp10db.commit() logger.info('End, committing db') wp10db.ping() wp10db.commit() logger.debug('SEEN REPORT:\nin seen: %s\nskipped: %s\nprocessed: %s', in_seen, skipped, processed)