Пример #1
0
def get_project_rating_by_type(wp10db,
                               project_name,
                               quality=None,
                               importance=None,
                               project_b_name=None,
                               quality_b=None,
                               importance_b=None,
                               pattern=None,
                               page=None,
                               limit=100):
    try:
        limit = int(limit)
    except ValueError:
        limit = 100
    if limit < 0:
        limit = 100
    if limit > 500:
        limit = 500

    query = _project_rating_query(project_name,
                                  quality=quality,
                                  importance=importance,
                                  project_b_name=project_b_name,
                                  quality_b=quality_b,
                                  importance_b=importance_b,
                                  pattern=pattern,
                                  page=page,
                                  limit=limit)
    params = {
        'r_project': project_name,
        'r_quality': quality,
        'r_importance': importance,
    }

    if pattern is not None:
        params['article_pattern_compiled'] = '%' + pattern + '%'
    if project_b_name is not None:
        params['r_project_b'] = project_b_name
    if quality_b is not None:
        params['r_quality_b'] = quality_b
    if importance_b is not None:
        params['r_importance_b'] = importance_b

    with wp10db.cursor() as cursor:
        cursor.execute(query, params)
        if project_b_name is None:
            return [Rating(**db_rating) for db_rating in cursor.fetchall()]

        results = []
        for res in cursor.fetchall():
            rating_b = Rating(r_project=res.pop('rating_b.r_project'),
                              r_article=res.pop('rating_b.r_article'),
                              r_namespace=res.pop('rating_b.r_namespace'),
                              r_quality=res.pop('rating_b.r_quality'),
                              r_importance=res.pop('rating_b.r_importance'))
            rating_a = Rating(**res)
            results.append((rating_a, rating_b))
        return results
Пример #2
0
 def setUp(self):
   super().setUp()
   self.maxDiff = None
   self.rating = Rating(r_project=b'Test Project',
                        r_namespace=4,
                        r_article=b'Test article pages',
                        r_importance=b'NotAClass',
                        r_importance_timestamp=b'2020-04-04T15:55:55Z',
                        r_quality=b'NotAClass',
                        r_quality_timestamp=b'2020-01-13T08:04:20Z')
Пример #3
0
    def test_add_log_for_importance_rating(self):
        rating = Rating(r_project=b'Test Project',
                        r_namespace=0,
                        r_article=b'Testing Stuff',
                        r_importance=b'Mid-Class',
                        r_importance_timestamp=b'2018-04-01T12:30:00Z')
        logic_rating.add_log_for_rating(self.wp10db, rating,
                                        AssessmentKind.IMPORTANCE,
                                        b'NotA-Class')

        with self.wp10db.cursor() as cursor:
            cursor.execute(
                '''
        SELECT * FROM ''' + Log.table_name + '''
        WHERE l_article = %s
      ''', (b'Testing Stuff', ))
            db_log = cursor.fetchone()
            self.assertIsNotNone(db_log)
            log = Log(**db_log)
        self.assertEqual(b'Test Project', log.l_project)
        self.assertEqual(0, log.l_namespace)
        self.assertEqual(b'Testing Stuff', log.l_article)
        self.assertEqual(b'Mid-Class', log.l_new)
        self.assertEqual(b'NotA-Class', log.l_old)
        self.assertEqual(b'importance', log.l_action)
Пример #4
0
def get_project_ratings(wp10db, project_name):
    with wp10db.cursor() as cursor:
        cursor.execute(
            'SELECT * FROM ' + Rating.table_name + '''
      WHERE r_project = %(r_project)s
    ''', {'r_project': project_name})
        return [Rating(**db_rating) for db_rating in cursor.fetchall()]
Пример #5
0
  def _insert_ratings(self):
    for r in self.ratings:
      rating = Rating(r_project=b'Test Project', r_namespace=0, r_article=r[0])
      rating.r_quality = r[1]
      rating.r_quality_timestamp = GLOBAL_TIMESTAMP_WIKI
      rating.r_importance = r[2]
      rating.r_importance_timestamp = GLOBAL_TIMESTAMP_WIKI

      with self.wp10db.cursor() as cursor:
        cursor.execute(
            '''
            INSERT INTO ratings
              (r_project, r_namespace, r_article, r_score, r_quality,
               r_quality_timestamp, r_importance, r_importance_timestamp)
            VALUES
              (%(r_project)s, %(r_namespace)s, %(r_article)s, %(r_score)s,
               %(r_quality)s, %(r_quality_timestamp)s, %(r_importance)s,
               %(r_importance_timestamp)s)
        ''', attr.asdict(rating))
      self.wp10db.commit()
Пример #6
0
def process_unseen_articles(wikidb, wp10db, project, old_ratings, seen):
    denom = len(old_ratings.keys())
    ratio = len(seen) / denom if denom != 0 else 'NaN'

    logger.debug('Looking for unseen articles, ratio was: %s', ratio)
    in_seen = 0
    skipped = 0
    processed = 0
    n = 0
    for ref, old_rating in old_ratings.items():
        if ref in seen:
            in_seen += 1
            continue

        # By default, we evaluate both assessment kinds.
        kind = AssessmentKind.BOTH
        if old_rating.r_quality == NOT_A_CLASS or old_rating.r_quality is None:
            # The quality rating is not set, so just evaluate importance
            kind = AssessmentKind.IMPORTANCE
            if (old_rating.r_importance == NOT_A_CLASS
                    or old_rating.r_importance is None):
                # The importance rating is also not set, so don't do anything.
                skipped += 1
                continue

        logger.debug('Processing unseen article %s', ref.decode('utf-8'))
        processed += 1
        ns, title = ref.decode('utf-8').split(':', 1)
        ns = int(ns.encode('utf-8'))
        title = title.encode('utf-8')

        move_data = logic_page.get_move_data(wp10db, wikidb, ns, title,
                                             project.timestamp_dt)
        if move_data is not None:
            logic_page.update_page_moved(wp10db, project, ns, title,
                                         move_data['dest_ns'],
                                         move_data['dest_title'],
                                         move_data['timestamp_dt'])

        # Mark this article as having NOT_A_CLASS for it's quality or importance.
        # This probably means the article was deleted, but could in fact mean that
        # we just failed to find its move data. Either way, the new article would
        # have already been picked up by the assessment updater, assuming it was
        # tagged correctly.
        rating = Rating(r_project=project.p_project,
                        r_namespace=ns,
                        r_article=title,
                        r_score=0)
        if kind in (AssessmentKind.QUALITY, AssessmentKind.BOTH):
            rating.quality = NOT_A_CLASS.encode('utf-8')
            if move_data:
                rating.set_quality_timestamp_dt(move_data['timestamp_dt'])
            else:
                rating.r_quality_timestamp = GLOBAL_TIMESTAMP_WIKI
        if kind in (AssessmentKind.IMPORTANCE, AssessmentKind.BOTH):
            rating.importance = NOT_A_CLASS.encode('utf-8')
            if move_data:
                rating.set_importance_timestamp_dt(move_data['timestamp_dt'])
            else:
                rating.r_importance_timestamp = GLOBAL_TIMESTAMP_WIKI

        logic_rating.insert_or_update(wp10db, rating, kind)

        if kind in (AssessmentKind.QUALITY, AssessmentKind.BOTH):
            logic_rating.add_log_for_rating(wp10db, rating,
                                            AssessmentKind.QUALITY,
                                            old_rating.r_quality)
        if kind in (AssessmentKind.IMPORTANCE, AssessmentKind.BOTH):
            logic_rating.add_log_for_rating(wp10db, rating,
                                            AssessmentKind.IMPORTANCE,
                                            old_rating.r_importance)

        n += 1
        if n >= MAX_ARTICLES_BEFORE_COMMIT:
            wp10db.ping()
            wp10db.commit()
    logger.info('End, committing db')
    wp10db.ping()
    wp10db.commit()

    logger.debug('SEEN REPORT:\nin seen: %s\nskipped: %s\nprocessed: %s',
                 in_seen, skipped, processed)
Пример #7
0
def update_project_assessments_by_kind(wikidb, wp10db, project,
                                       extra_assessments, kind, old_ratings,
                                       seen):
    if kind not in (AssessmentKind.QUALITY, AssessmentKind.IMPORTANCE):
        raise ValueError(
            'Parameter "kind" was not one of QUALITY or IMPORTANCE')

    logger.info('Updating project %s assessments for %s', kind,
                project.p_project)
    rating_to_category = update_project_categories_by_kind(
        wikidb, wp10db, project, extra_assessments, kind)

    n = 0
    new_ratings = defaultdict(list)
    for current_rating, (category, ranking) in rating_to_category.items():
        logger.info('Fetching article list for %r' % category.decode('utf-8'))
        current_rating = current_rating.encode('utf-8')

        for page in logic_page.get_pages_by_category(wikidb, category):
            # Talk pages are tagged, we want the NS of the article itself.
            namespace = page.page_namespace - 1
            if not logic_util.is_namespace_acceptable(namespace):
                logger.debug('Skipping %s with namespace=%s', page.page_title,
                             namespace)
                continue

            article_ref = str(namespace).encode(
                'utf-8') + b':' + page.page_title
            seen.add(article_ref)

            old_rating = old_ratings.get(article_ref)
            old_rating_value = None

            if old_rating:
                rating = Rating(**attr.asdict(old_rating))
                if kind == AssessmentKind.QUALITY:
                    old_rating_value = rating.r_quality
                elif kind == AssessmentKind.IMPORTANCE:
                    old_rating_value = rating.r_importance
            else:
                rating = Rating(r_project=project.p_project,
                                r_namespace=namespace,
                                r_article=page.page_title,
                                r_score=0)
                old_rating_value = NOT_A_CLASS.encode('utf-8')

            if kind == AssessmentKind.QUALITY:
                rating.r_quality = current_rating
                rating.set_quality_timestamp_dt(page.cl_timestamp)
            elif kind == AssessmentKind.IMPORTANCE:
                rating.r_importance = current_rating
                rating.set_importance_timestamp_dt(page.cl_timestamp)

            new_ratings[article_ref].append((rating, kind, old_rating_value))
            n += 1
            if n >= MAX_ARTICLES_BEFORE_COMMIT:
                wp10db.ping()
                wp10db.commit()
    logger.info('End, committing db')
    wp10db.ping()
    wp10db.commit()

    return (new_ratings, rating_to_category)
Пример #8
0
class RatingModelTest(BaseWpOneDbTest):

  def setUp(self):
    super().setUp()
    self.maxDiff = None
    self.rating = Rating(r_project=b'Test Project',
                         r_namespace=4,
                         r_article=b'Test article pages',
                         r_importance=b'NotAClass',
                         r_importance_timestamp=b'2020-04-04T15:55:55Z',
                         r_quality=b'NotAClass',
                         r_quality_timestamp=b'2020-01-13T08:04:20Z')

  def test_to_web_dict_namespace(self):
    expected = {
        'article':
            'Wikipedia:Test article pages',
        'article_link':
            'https://en.wikipedia.org/w/index.php?title=Wikipedia:Test%20article%20pages',
        'article_talk':
            'Wikipedia talk:Test article pages',
        'article_talk_link':
            'https://en.wikipedia.org/w/index.php?title=Wikipedia talk:Test%20article%20pages',
        'article_history_link':
            'https://en.wikipedia.org/w/index.php?title=Wikipedia:Test%20article%20pages&action=history',
        'importance':
            'NotAClass',
        'importance_updated':
            '2020-04-04T15:55:55Z',
        'quality':
            'NotAClass',
        'quality_updated':
            '2020-01-13T08:04:20Z'
    }

    actual = self.rating.to_web_dict(self.wp10db)
    self.assertEqual(expected, actual)

  def test_to_web_dict_no_namespace(self):
    self.rating.r_namespace = 0
    expected = {
        'article':
            'Test article pages',
        'article_link':
            'https://en.wikipedia.org/w/index.php?title=Test%20article%20pages',
        'article_talk':
            'Talk:Test article pages',
        'article_talk_link':
            'https://en.wikipedia.org/w/index.php?title=Talk:Test%20article%20pages',
        'article_history_link':
            'https://en.wikipedia.org/w/index.php?title=Test%20article%20pages&action=history',
        'importance':
            'NotAClass',
        'importance_updated':
            '2020-04-04T15:55:55Z',
        'quality':
            'NotAClass',
        'quality_updated':
            '2020-01-13T08:04:20Z'
    }

    actual = self.rating.to_web_dict(self.wp10db)
    self.assertEqual(expected, actual)