def test_register_duplicate_bare(self): self.assertEquals(Article.objects( duplicate_of__exists=False).count(), 3) self.article1.register_duplicate(self.article2) # needed because feeds are modified in another instance of the # same dabase record, via the celery task. self.article1.safe_reload() self.assertEquals(self.article1.reads.count(), 10) self.assertEquals(self.article2.reads.count(), 0) self.assertEquals(len(self.article1.feeds), 10) self.assertEquals(len(self.article2.feeds), 5) self.assertEquals(self.article2.duplicate_of, self.article1) self.assertEquals(Article.objects( duplicate_of__exists=True).count(), 1) self.assertEquals(Article.objects( duplicate_of__exists=False).count(), 2)
def test_register_duplicate_bare(self): self.assertEquals( Article.objects(duplicate_of__exists=False).count(), 3) self.article1.register_duplicate(self.article2) # needed because feeds are modified in another instance of the # same dabase record, via the celery task. self.article1.safe_reload() self.assertEquals(self.article1.reads.count(), 10) self.assertEquals(self.article2.reads.count(), 0) self.assertEquals(len(self.article1.feeds), 10) self.assertEquals(len(self.article2.feeds), 5) self.assertEquals(self.article2.duplicate_of, self.article1) self.assertEquals( Article.objects(duplicate_of__exists=True).count(), 1) self.assertEquals( Article.objects(duplicate_of__exists=False).count(), 2)
def test_url_error_classifier(self): # NOTE: these errors strings are directly taken from the production # database. Only URLs have been changed for tests. # results = UrlErrorClassifier(Article.objects(url_error__ne=''), 'url_error').classify() self.assertEquals(sorted(results.keys()), [u'duration', u'error_types', u'seen_objects', u'stored_instances']) self.assertEquals(results.get('seen_objects'), 5) stored = results.get('stored_instances') errors = results.get('error_types') self.assertEquals(len(errors), 4) self.assertEquals(len(stored), 4) err404 = stored.get(UrlErrorClassifier.ERR_NETWORK_HTTP404) self.assertEquals(len(err404), 2) self.assertTrue(self.a3 in err404) self.assertTrue(self.a4 in err404) err401 = stored.get(UrlErrorClassifier.ERR_NETWORK_HTTP401) self.assertEquals(err401, None)
def test_url_error_classifier(self): # NOTE: these errors strings are directly taken from the production # database. Only URLs have been changed for tests. # results = UrlErrorClassifier(Article.objects(url_error__ne=''), 'url_error').classify() self.assertEquals(sorted(results.keys()), [ u'duration', u'error_types', u'seen_objects', u'stored_instances' ]) self.assertEquals(results.get('seen_objects'), 5) stored = results.get('stored_instances') errors = results.get('error_types') self.assertEquals(len(errors), 4) self.assertEquals(len(stored), 4) err404 = stored.get(UrlErrorClassifier.ERR_NETWORK_HTTP404) self.assertEquals(len(err404), 2) self.assertTrue(self.a3 in err404) self.assertTrue(self.a4 in err404) err401 = stored.get(UrlErrorClassifier.ERR_NETWORK_HTTP401) self.assertEquals(err401, None)
def synchronize_statsd_articles_gauges(full=False): with benchmark('synchronize statsd gauges for Article.*'): empty = Article.objects(content_type=0).no_cache() #empty_pending = empty.filter(content_error='', url_error='') #empty_content_error = empty.filter(content_error__ne='') #empty_url_error = empty.filter(url_error__ne='') parsed = Article.objects(content_type__ne=CONTENT_TYPE_NONE) html = parsed.filter(content_type=CONTENT_TYPE_HTML) markdown = parsed.filter(content_type=CONTENT_TYPE_MARKDOWN) absolutes = Article.objects(url_absolute=True).no_cache() duplicates = Article.objects(duplicate_of__ne=None).no_cache() orphaned = Article.objects(orphaned=True).no_cache() content_errors = Article.objects(content_error__ne='').no_cache() url_errors = Article.objects(url_error__ne='').no_cache() statsd.gauge('articles.counts.total', Article._get_collection().count()) statsd.gauge('articles.counts.markdown', markdown.count()) statsd.gauge('articles.counts.html', html.count()) statsd.gauge('articles.counts.empty', empty.count()) statsd.gauge('articles.counts.content_errors', content_errors.count()) statsd.gauge('articles.counts.url_errors', url_errors.count()) if full: statsd.gauge('articles.counts.orphaned', orphaned.count()) statsd.gauge('articles.counts.absolutes', absolutes.count()) statsd.gauge('articles.counts.duplicates', duplicates.count())
def article_url_error_types(): # Next to investigate: # list index out of range: 758 # 'NoneType' object has no attribute 'findAll': 137 return UrlErrorClassifier( Article.objects(url_error__ne='').no_cache(), 'url_error' ).classify()
def test_generic_errors_classifiers(self): results = GenericErrorClassifier(Article.objects(url_error__ne=''), 'url_error').classify() stored = results.get('stored_instances') errors = results.get('error_types') self.assertEquals(results.get('seen_objects'), 5) self.assertEquals(len(errors), 5) self.assertEquals(len(stored), 5)
def test_python_errors_classifiers(self): results = PythonErrorClassifier(Article.objects(url_error__ne=''), 'url_error').classify() stored = results.get('stored_instances') errors = results.get('error_types') self.assertEquals(results.get('seen_objects'), 5) self.assertEquals(len(errors), 5) self.assertEquals(len(stored), 5)
def test_content_error_classifier(self): # NOTE: these errors strings are directly taken from the production # database. Only URLs have been changed for tests. # # ValidationError (Article:51fa68957711037f4003a37b) (1.GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa68e47711037f3d03a3fe) (5.GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa6b6aa24639329b2ce203) (1.GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa69c3a24639329a2ce21a) (3.GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa67a97711037f3d03a33d) (GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa68e57711037f3d03a413) (4.GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa64377711037f3f03a30c) (2.GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa69c3a24639329a2ce207) (3.GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa6b3f7711037f6a25ae46) (6.GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa6b68a2463932a02ce2af) (11.GenericReferences can only contain documents: ['tags']): 1 # TODO: url_error__ne -> content_error__ne # when we fully implement this test method. results = ContentErrorClassifier(Article.objects(url_error__ne=''), 'content_error').classify() self.assertEquals(results.get('seen_objects'), 5)
def article_content_error_types(): return ContentErrorClassifier( Article.objects(content_error__ne='').no_cache(), 'content_error' ).classify()