Exemplo n.º 1
0
    def test_register_duplicate_bare(self):

        self.assertEquals(Article.objects(
                          duplicate_of__exists=False).count(), 3)

        self.article1.register_duplicate(self.article2)

        # needed because feeds are modified in another instance of the
        # same dabase record, via the celery task.
        self.article1.safe_reload()

        self.assertEquals(self.article1.reads.count(), 10)

        self.assertEquals(self.article2.reads.count(), 0)

        self.assertEquals(len(self.article1.feeds), 10)

        self.assertEquals(len(self.article2.feeds), 5)

        self.assertEquals(self.article2.duplicate_of, self.article1)

        self.assertEquals(Article.objects(
                          duplicate_of__exists=True).count(), 1)
        self.assertEquals(Article.objects(
                          duplicate_of__exists=False).count(), 2)
Exemplo n.º 2
0
    def test_register_duplicate_bare(self):

        self.assertEquals(
            Article.objects(duplicate_of__exists=False).count(), 3)

        self.article1.register_duplicate(self.article2)

        # needed because feeds are modified in another instance of the
        # same dabase record, via the celery task.
        self.article1.safe_reload()

        self.assertEquals(self.article1.reads.count(), 10)

        self.assertEquals(self.article2.reads.count(), 0)

        self.assertEquals(len(self.article1.feeds), 10)

        self.assertEquals(len(self.article2.feeds), 5)

        self.assertEquals(self.article2.duplicate_of, self.article1)

        self.assertEquals(
            Article.objects(duplicate_of__exists=True).count(), 1)
        self.assertEquals(
            Article.objects(duplicate_of__exists=False).count(), 2)
Exemplo n.º 3
0
    def test_url_error_classifier(self):

        # NOTE: these errors strings are directly taken from the production
        #       database. Only URLs have been changed for tests.
        #

        results = UrlErrorClassifier(Article.objects(url_error__ne=''),
                                     'url_error').classify()

        self.assertEquals(sorted(results.keys()), [u'duration',
                          u'error_types', u'seen_objects', u'stored_instances'])
        self.assertEquals(results.get('seen_objects'), 5)

        stored = results.get('stored_instances')
        errors = results.get('error_types')

        self.assertEquals(len(errors), 4)
        self.assertEquals(len(stored), 4)

        err404 = stored.get(UrlErrorClassifier.ERR_NETWORK_HTTP404)

        self.assertEquals(len(err404), 2)
        self.assertTrue(self.a3 in err404)
        self.assertTrue(self.a4 in err404)

        err401 = stored.get(UrlErrorClassifier.ERR_NETWORK_HTTP401)
        self.assertEquals(err401, None)
Exemplo n.º 4
0
    def test_url_error_classifier(self):

        # NOTE: these errors strings are directly taken from the production
        #       database. Only URLs have been changed for tests.
        #

        results = UrlErrorClassifier(Article.objects(url_error__ne=''),
                                     'url_error').classify()

        self.assertEquals(sorted(results.keys()), [
            u'duration', u'error_types', u'seen_objects', u'stored_instances'
        ])
        self.assertEquals(results.get('seen_objects'), 5)

        stored = results.get('stored_instances')
        errors = results.get('error_types')

        self.assertEquals(len(errors), 4)
        self.assertEquals(len(stored), 4)

        err404 = stored.get(UrlErrorClassifier.ERR_NETWORK_HTTP404)

        self.assertEquals(len(err404), 2)
        self.assertTrue(self.a3 in err404)
        self.assertTrue(self.a4 in err404)

        err401 = stored.get(UrlErrorClassifier.ERR_NETWORK_HTTP401)
        self.assertEquals(err401, None)
Exemplo n.º 5
0
def synchronize_statsd_articles_gauges(full=False):

    with benchmark('synchronize statsd gauges for Article.*'):

        empty               = Article.objects(content_type=0).no_cache()
        #empty_pending       = empty.filter(content_error='', url_error='')
        #empty_content_error = empty.filter(content_error__ne='')
        #empty_url_error     = empty.filter(url_error__ne='')

        parsed             = Article.objects(content_type__ne=CONTENT_TYPE_NONE)
        html               = parsed.filter(content_type=CONTENT_TYPE_HTML)
        markdown           = parsed.filter(content_type=CONTENT_TYPE_MARKDOWN)

        absolutes          = Article.objects(url_absolute=True).no_cache()
        duplicates         = Article.objects(duplicate_of__ne=None).no_cache()
        orphaned           = Article.objects(orphaned=True).no_cache()
        content_errors     = Article.objects(content_error__ne='').no_cache()
        url_errors         = Article.objects(url_error__ne='').no_cache()

        statsd.gauge('articles.counts.total', Article._get_collection().count())
        statsd.gauge('articles.counts.markdown', markdown.count())
        statsd.gauge('articles.counts.html', html.count())
        statsd.gauge('articles.counts.empty', empty.count())
        statsd.gauge('articles.counts.content_errors', content_errors.count())
        statsd.gauge('articles.counts.url_errors', url_errors.count())

        if full:
            statsd.gauge('articles.counts.orphaned', orphaned.count())
            statsd.gauge('articles.counts.absolutes', absolutes.count())
            statsd.gauge('articles.counts.duplicates', duplicates.count())
Exemplo n.º 6
0
def article_url_error_types():

    # Next to investigate:
    #    list index out of range: 758
    #    'NoneType' object has no attribute 'findAll': 137

    return UrlErrorClassifier(
        Article.objects(url_error__ne='').no_cache(),
        'url_error'
    ).classify()
Exemplo n.º 7
0
    def test_generic_errors_classifiers(self):

        results = GenericErrorClassifier(Article.objects(url_error__ne=''),
                                         'url_error').classify()

        stored = results.get('stored_instances')
        errors = results.get('error_types')

        self.assertEquals(results.get('seen_objects'), 5)

        self.assertEquals(len(errors), 5)
        self.assertEquals(len(stored), 5)
Exemplo n.º 8
0
    def test_python_errors_classifiers(self):

        results = PythonErrorClassifier(Article.objects(url_error__ne=''),
                                        'url_error').classify()

        stored = results.get('stored_instances')
        errors = results.get('error_types')

        self.assertEquals(results.get('seen_objects'), 5)

        self.assertEquals(len(errors), 5)
        self.assertEquals(len(stored), 5)
Exemplo n.º 9
0
    def test_content_error_classifier(self):

        # NOTE: these errors strings are directly taken from the production
        #       database. Only URLs have been changed for tests.
        #
        # ValidationError (Article:51fa68957711037f4003a37b) (1.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa68e47711037f3d03a3fe) (5.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa6b6aa24639329b2ce203) (1.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa69c3a24639329a2ce21a) (3.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa67a97711037f3d03a33d) (GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa68e57711037f3d03a413) (4.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa64377711037f3f03a30c) (2.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa69c3a24639329a2ce207) (3.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa6b3f7711037f6a25ae46) (6.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa6b68a2463932a02ce2af) (11.GenericReferences can only contain documents: ['tags']): 1

        # TODO: url_error__ne -> content_error__ne
        # when we fully implement this test method.
        results = ContentErrorClassifier(Article.objects(url_error__ne=''),
                                         'content_error').classify()

        self.assertEquals(results.get('seen_objects'), 5)
Exemplo n.º 10
0
    def test_content_error_classifier(self):

        # NOTE: these errors strings are directly taken from the production
        #       database. Only URLs have been changed for tests.
        #
        # ValidationError (Article:51fa68957711037f4003a37b) (1.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa68e47711037f3d03a3fe) (5.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa6b6aa24639329b2ce203) (1.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa69c3a24639329a2ce21a) (3.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa67a97711037f3d03a33d) (GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa68e57711037f3d03a413) (4.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa64377711037f3f03a30c) (2.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa69c3a24639329a2ce207) (3.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa6b3f7711037f6a25ae46) (6.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa6b68a2463932a02ce2af) (11.GenericReferences can only contain documents: ['tags']): 1

        # TODO: url_error__ne -> content_error__ne
        # when we fully implement this test method.
        results = ContentErrorClassifier(Article.objects(url_error__ne=''),
                                         'content_error').classify()

        self.assertEquals(results.get('seen_objects'), 5)
Exemplo n.º 11
0
def article_content_error_types():

    return ContentErrorClassifier(
        Article.objects(content_error__ne='').no_cache(),
        'content_error'
    ).classify()