def setUp(self): #Article.drop_collection() #Feed.drop_collection() self.article1 = Article( title=u'test1', url= u'http://rss.feedsportal.com/c/707/f/9951/s/2b27496a/l/0L0Sreseaux0Etelecoms0Bnet0Cactualites0Clire0Elancement0Emondial0Edu0Esamsung0Egalaxy0Es40E25980A0Bhtml/story01.htm' ).save() # NOQA self.article2 = Article( title=u'test2', url= u'http://feedproxy.google.com/~r/francaistechcrunch/~3/hEIhLwVyEEI/' ).save() # NOQA self.article3 = Article( title=u'test3', url=u'http://obi.1flow.io/absolutize_test_401').save() # NOQA self.article4 = Article( title=u'test4', url=u'http://host.non.exixstentz.com/absolutize_test').save( ) # NOQA self.article5 = Article( title=u'test5', url=u'http://1flow.io/absolutize_test_404').save() # NOQA
def setUp(self): # NOTE: we need real web pages, else the absolutization won't work or # will find duplicates and tests will fail for a real-life reason. self.article1 = Article(title='test1', url='http://blog.1flow.io/post/' '59410536612/1flow-blog-has-moved').save() self.feed = Feed(name='1flow test feed', url='http://blog.1flow.io/rss').save() self.article1.update(add_to_set__feeds=self.feed) self.article1.reload() # User & Reads creation for index in xrange(1, 2): username = '******' % index du = DjangoUser.objects.create(username=username, email='*****@*****.**' % username) # PG post_save() signal already created the MongoDB user. u = du.mongo Read(user=u, article=self.article1).save() Subscription(user=u, feed=self.feed).save() for index in xrange(2, 5): username = '******' % index du = DjangoUser.objects.create(username=username, email='*****@*****.**' % username)
def setUp(self): WebSite.drop_collection() Article.drop_collection() self.ws1 = WebSite(url='http://test1.com').save() self.ws2 = WebSite(url='http://test2.com').save()
def test_register_duplicate_bare(self): self.assertEquals(Article.objects( duplicate_of__exists=False).count(), 3) self.article1.register_duplicate(self.article2) # needed because feeds are modified in another instance of the # same dabase record, via the celery task. self.article1.safe_reload() self.assertEquals(self.article1.reads.count(), 10) self.assertEquals(self.article2.reads.count(), 0) self.assertEquals(len(self.article1.feeds), 10) self.assertEquals(len(self.article2.feeds), 5) self.assertEquals(self.article2.duplicate_of, self.article1) self.assertEquals(Article.objects( duplicate_of__exists=True).count(), 1) self.assertEquals(Article.objects( duplicate_of__exists=False).count(), 2)
def test_register_duplicate_bare(self): self.assertEquals( Article.objects(duplicate_of__exists=False).count(), 3) self.article1.register_duplicate(self.article2) # needed because feeds are modified in another instance of the # same dabase record, via the celery task. self.article1.safe_reload() self.assertEquals(self.article1.reads.count(), 10) self.assertEquals(self.article2.reads.count(), 0) self.assertEquals(len(self.article1.feeds), 10) self.assertEquals(len(self.article2.feeds), 5) self.assertEquals(self.article2.duplicate_of, self.article1) self.assertEquals( Article.objects(duplicate_of__exists=True).count(), 1) self.assertEquals( Article.objects(duplicate_of__exists=False).count(), 2)
class AbsolutizeTest(TestCase): def setUp(self): #Article.drop_collection() #Feed.drop_collection() self.article1 = Article(title=u'test1', url=u'http://rss.feedsportal.com/c/707/f/9951/s/2b27496a/l/0L0Sreseaux0Etelecoms0Bnet0Cactualites0Clire0Elancement0Emondial0Edu0Esamsung0Egalaxy0Es40E25980A0Bhtml/story01.htm').save() # NOQA self.article2 = Article(title=u'test2', url=u'http://feedproxy.google.com/~r/francaistechcrunch/~3/hEIhLwVyEEI/').save() # NOQA self.article3 = Article(title=u'test3', url=u'http://obi.1flow.io/absolutize_test_401').save() # NOQA self.article4 = Article(title=u'test4', url=u'http://host.non.exixstentz.com/absolutize_test').save() # NOQA self.article5 = Article(title=u'test5', url=u'http://1flow.io/absolutize_test_404').save() # NOQA def tearDown(self): Article.drop_collection() Feed.drop_collection() def test_absolutize(self): self.article1.absolutize_url() self.assertEquals(self.article1.url, u'http://www.reseaux-telecoms.net/actualites/lire-lancement-mondial-du-samsung-galaxy-s4-25980.html') # NOQA self.assertEquals(self.article1.url_absolute, True) self.assertEquals(self.article1.url_error, '') self.article2.absolutize_url() self.assertEquals(self.article2.url, u'http://techcrunch.com/2013/05/18/hell-no-tumblr-users-wont-go-to-yahoo/') # NOQA self.assertEquals(self.article2.url_absolute, True) self.assertEquals(self.article2.url_error, '') def test_absolutize_errors(self): # # NOTE: if a PROXY is set, the reasons word cases can vary. # eg. 'Not Found' (via Squid) instead of 'NOT FOUND' (direct answer). # self.article3.absolutize_url() self.assertEquals(self.article3.url, u'http://obi.1flow.io/absolutize_test_401') # NOQA self.assertEquals(self.article3.url_absolute, False) self.assertEquals(self.article3.url_error, u'HTTP Error 401 (Unauthorized) while resolving http://obi.1flow.io/absolutize_test_401.') # NOQA self.article5.absolutize_url() self.assertEquals(self.article5.url, u'http://1flow.io/absolutize_test_404') # NOQA self.assertEquals(self.article5.url_absolute, False) self.assertEquals(self.article5.url_error, u'HTTP Error 404 (NOT FOUND) while resolving http://1flow.io/absolutize_test_404.') # NOQA self.article4.absolutize_url() self.assertEquals(self.article4.url, u'http://host.non.exixstentz.com/absolutize_test') # NOQA self.assertEquals(self.article4.url_absolute, False) self.assertEquals(self.article4.url_error[:108], u"HTTPConnectionPool(host='host.non.exixstentz.com', port=80): Max retries exceeded with url: /absolutize_test") # NOQA
def synchronize_statsd_articles_gauges(full=False): with benchmark('synchronize statsd gauges for Article.*'): empty = Article.objects(content_type=0).no_cache() #empty_pending = empty.filter(content_error='', url_error='') #empty_content_error = empty.filter(content_error__ne='') #empty_url_error = empty.filter(url_error__ne='') parsed = Article.objects(content_type__ne=CONTENT_TYPE_NONE) html = parsed.filter(content_type=CONTENT_TYPE_HTML) markdown = parsed.filter(content_type=CONTENT_TYPE_MARKDOWN) absolutes = Article.objects(url_absolute=True).no_cache() duplicates = Article.objects(duplicate_of__ne=None).no_cache() orphaned = Article.objects(orphaned=True).no_cache() content_errors = Article.objects(content_error__ne='').no_cache() url_errors = Article.objects(url_error__ne='').no_cache() statsd.gauge('articles.counts.total', Article._get_collection().count()) statsd.gauge('articles.counts.markdown', markdown.count()) statsd.gauge('articles.counts.html', html.count()) statsd.gauge('articles.counts.empty', empty.count()) statsd.gauge('articles.counts.content_errors', content_errors.count()) statsd.gauge('articles.counts.url_errors', url_errors.count()) if full: statsd.gauge('articles.counts.orphaned', orphaned.count()) statsd.gauge('articles.counts.absolutes', absolutes.count()) statsd.gauge('articles.counts.duplicates', duplicates.count())
def setUp(self): #Article.drop_collection() #Feed.drop_collection() self.article1 = Article(title=u'test1', url=u'http://rss.feedsportal.com/c/707/f/9951/s/2b27496a/l/0L0Sreseaux0Etelecoms0Bnet0Cactualites0Clire0Elancement0Emondial0Edu0Esamsung0Egalaxy0Es40E25980A0Bhtml/story01.htm').save() # NOQA self.article2 = Article(title=u'test2', url=u'http://feedproxy.google.com/~r/francaistechcrunch/~3/hEIhLwVyEEI/').save() # NOQA self.article3 = Article(title=u'test3', url=u'http://obi.1flow.io/absolutize_test_401').save() # NOQA self.article4 = Article(title=u'test4', url=u'http://host.non.exixstentz.com/absolutize_test').save() # NOQA self.article5 = Article(title=u'test5', url=u'http://1flow.io/absolutize_test_404').save() # NOQA
def test_url_error_classifier(self): # NOTE: these errors strings are directly taken from the production # database. Only URLs have been changed for tests. # results = UrlErrorClassifier(Article.objects(url_error__ne=''), 'url_error').classify() self.assertEquals(sorted(results.keys()), [u'duration', u'error_types', u'seen_objects', u'stored_instances']) self.assertEquals(results.get('seen_objects'), 5) stored = results.get('stored_instances') errors = results.get('error_types') self.assertEquals(len(errors), 4) self.assertEquals(len(stored), 4) err404 = stored.get(UrlErrorClassifier.ERR_NETWORK_HTTP404) self.assertEquals(len(err404), 2) self.assertTrue(self.a3 in err404) self.assertTrue(self.a4 in err404) err401 = stored.get(UrlErrorClassifier.ERR_NETWORK_HTTP401) self.assertEquals(err401, None)
def test_url_error_classifier(self): # NOTE: these errors strings are directly taken from the production # database. Only URLs have been changed for tests. # results = UrlErrorClassifier(Article.objects(url_error__ne=''), 'url_error').classify() self.assertEquals(sorted(results.keys()), [ u'duration', u'error_types', u'seen_objects', u'stored_instances' ]) self.assertEquals(results.get('seen_objects'), 5) stored = results.get('stored_instances') errors = results.get('error_types') self.assertEquals(len(errors), 4) self.assertEquals(len(stored), 4) err404 = stored.get(UrlErrorClassifier.ERR_NETWORK_HTTP404) self.assertEquals(len(err404), 2) self.assertTrue(self.a3 in err404) self.assertTrue(self.a4 in err404) err401 = stored.get(UrlErrorClassifier.ERR_NETWORK_HTTP401) self.assertEquals(err401, None)
def setUp(self): # NOTE: we need real web pages, else the absolutization won't work or # will find duplicates and tests will fail for a real-life reason. self.article1 = Article(title='test1', url='http://blog.1flow.io/post/' '59410536612/1flow-blog-has-moved').save() self.article2 = Article(title='test2', url='http://obi.1flow.io/fr/').save() self.article3 = Article(title='test3', url='http://obi.1flow.io/en/').save() # User & Reads creation for index in xrange(1, 6): username = '******' % index du = DjangoUser.objects.create(username=username, email='*****@*****.**' % username) # NOTE: the mongoDB user is created automatically. If you # try to create one it will fail with duplicate index error. u = du.mongo Read(user=u, article=self.article1).save() for index in xrange(6, 11): username = '******' % index du = DjangoUser.objects.create(username=username, email='*****@*****.**' % username) u = du.mongo Read(user=u, article=self.article2).save() # Feeds creation for index in xrange(1, 6): f = Feed(name='test feed #%s' % index, url='http://test-feed%s.com' % index).save() self.article1.update(add_to_set__feeds=f) self.article1.reload() for index in xrange(6, 11): f = Feed(name='test feed #%s' % index, url='http://test-feed%s.com' % index).save() self.article2.update(add_to_set__feeds=f) self.article2.reload()
def article_url_error_types(): # Next to investigate: # list index out of range: 758 # 'NoneType' object has no attribute 'findAll': 137 return UrlErrorClassifier( Article.objects(url_error__ne='').no_cache(), 'url_error' ).classify()
def test_generic_errors_classifiers(self): results = GenericErrorClassifier(Article.objects(url_error__ne=''), 'url_error').classify() stored = results.get('stored_instances') errors = results.get('error_types') self.assertEquals(results.get('seen_objects'), 5) self.assertEquals(len(errors), 5) self.assertEquals(len(stored), 5)
def test_python_errors_classifiers(self): results = PythonErrorClassifier(Article.objects(url_error__ne=''), 'url_error').classify() stored = results.get('stored_instances') errors = results.get('error_types') self.assertEquals(results.get('seen_objects'), 5) self.assertEquals(len(errors), 5) self.assertEquals(len(stored), 5)
def test_content_error_classifier(self): # NOTE: these errors strings are directly taken from the production # database. Only URLs have been changed for tests. # # ValidationError (Article:51fa68957711037f4003a37b) (1.GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa68e47711037f3d03a3fe) (5.GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa6b6aa24639329b2ce203) (1.GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa69c3a24639329a2ce21a) (3.GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa67a97711037f3d03a33d) (GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa68e57711037f3d03a413) (4.GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa64377711037f3f03a30c) (2.GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa69c3a24639329a2ce207) (3.GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa6b3f7711037f6a25ae46) (6.GenericReferences can only contain documents: ['tags']): 1 # ValidationError (Article:51fa6b68a2463932a02ce2af) (11.GenericReferences can only contain documents: ['tags']): 1 # TODO: url_error__ne -> content_error__ne # when we fully implement this test method. results = ContentErrorClassifier(Article.objects(url_error__ne=''), 'content_error').classify() self.assertEquals(results.get('seen_objects'), 5)
def setUp(self): # NOTE: we need real web pages, else the absolutization won't work or # will find duplicates and tests will fail for a real-life reason. # Here we need to keep an article without any url_error, so we have # to make it point to a real working URL. self.a1 = Article( title='ErrorClassifierTests #1', url='http://blog.1flow.io/post/59410536612/1flow-blog-has-moved' ).save() # NOQA self.a2 = Article( title='ErrorClassifierTests #2', url='http://t.co/t2', url_error= "HTTPConnectionPool(host='t.co', port=80): Max retries exceeded with url: /t1 (Caused by <class 'socket.error'>: [Errno 60] Operation timed out)" ).save() # NOQA self.a3 = Article( title='ErrorClassifierTests #3', url='http://t.co/t3', url_error= "HTTP Error 404 (Not Found) while resolving http://t.co/t3.").save( ) # NOQA self.a4 = Article( title='ErrorClassifierTests #4', url='http://t.co/t4', url_error= "HTTP Error 404 (Not Found) while resolving http://t.co/t4.").save( ) # NOQA self.a5 = Article( title='ErrorClassifierTests #5', url='http://t.co/t5', url_error= "HTTPConnectionPool(host='t.co', port=80): Max retries exceeded with url: /t5 (Caused by <class 'socket.error'>: [Errno 65] No route to host)" ).save() # NOQA self.a6 = Article( title='ErrorClassifierTests #6', url='http://t.co/6', url_error= "HTTPConnectionPool(host='t.co', port=80): Max retries exceeded with url: /t6 (Caused by <class 'socket.error'>: [Errno 54] Connection reset by peer)" ).save() # NOQA
import logging from django.test import TestCase from django.test.utils import override_settings from oneflow.base.tests import connect_mongodb_testsuite from oneflow.core.models import Article from oneflow.core.stats import (PythonErrorClassifier, GenericErrorClassifier, UrlErrorClassifier, ContentErrorClassifier) LOGGER = logging.getLogger(__file__) connect_mongodb_testsuite() Article.drop_collection() @override_settings(STATICFILES_STORAGE= 'pipeline.storage.NonPackagingPipelineStorage', CELERY_EAGER_PROPAGATES_EXCEPTIONS=True, CELERY_ALWAYS_EAGER=True, BROKER_BACKEND='memory',) class ErrorClassifierTests(TestCase): def setUp(self): # NOTE: we need real web pages, else the absolutization won't work or # will find duplicates and tests will fail for a real-life reason. # Here we need to keep an article without any url_error, so we have # to make it point to a real working URL.
def tearDown(self): WebSite.drop_collection() Article.drop_collection()
from oneflow.core.tasks import global_feeds_checker from oneflow.base.utils import RedisStatsCounter from oneflow.base.tests import (connect_mongodb_testsuite, TEST_REDIS) DjangoUser = get_user_model() LOGGER = logging.getLogger(__file__) # Use the test database not to pollute the production/development one. RedisStatsCounter.REDIS = TEST_REDIS TEST_REDIS.flushdb() connect_mongodb_testsuite() # Empty the database before starting in case an old test failed to tearDown(). Article.drop_collection() Read.drop_collection() User.drop_collection() Group.drop_collection() Feed.drop_collection() Tag.drop_collection() Folder.drop_collection() WebSite.drop_collection() Author.drop_collection() class ThrottleIntervalTest(TestCase): def test_lower_interval_with_etag_or_modified(self): t = Feed.throttle_fetch_interval
class FeedsTest(TestCase): def setUp(self): # NOTE: we need real web pages, else the absolutization won't work or # will find duplicates and tests will fail for a real-life reason. self.article1 = Article(title='test1', url='http://blog.1flow.io/post/' '59410536612/1flow-blog-has-moved').save() self.feed = Feed(name='1flow test feed', url='http://blog.1flow.io/rss').save() self.article1.update(add_to_set__feeds=self.feed) self.article1.reload() # User & Reads creation for index in xrange(1, 2): username = '******' % index du = DjangoUser.objects.create(username=username, email='*****@*****.**' % username) # PG post_save() signal already created the MongoDB user. u = du.mongo Read(user=u, article=self.article1).save() Subscription(user=u, feed=self.feed).save() for index in xrange(2, 5): username = '******' % index du = DjangoUser.objects.create(username=username, email='*****@*****.**' % username) def tearDown(self): Subscription.drop_collection() Feed.drop_collection() Read.drop_collection() Article.drop_collection() User.drop_collection() def test_close(self): closed_reason = u'closed for tests' self.feed.close(closed_reason) self.assertTrue(self.feed.closed) self.assertEquals(self.feed.closed_reason, closed_reason) self.assertFalse(self.feed.date_closed is None) global_feeds_checker() self.assertEquals(len(mail.outbox), 1) self.assertTrue( u'Reminder: 1 feed(s) closed in last' in mail.outbox[0].subject) self.assertTrue(unicode(self.feed) in mail.outbox[0].body) #self.assertEqual( mail.outbox[0].to, [ "*****@*****.**" ] ) #self.assertTrue( "*****@*****.**" in mail.outbox[0].to ) def test_feeds_creation(self): # .setUp() creates one already. self.assertEquals(Feed._get_collection().count(), 1) feed, created = Feed.create_feeds_from_url(u'http://ntoll.org/')[0] self.assertTrue(created) self.assertEquals(feed.url, u'http://ntoll.org/rss.xml') self.assertEquals(Feed._get_collection().count(), 2) # Via the Home Page feed, created = Feed.create_feeds_from_url(u'http://www.zdnet.fr/')[0] self.assertTrue(created) self.assertEquals(feed.url, u'http://www.zdnet.fr/feeds/rss/') self.assertEquals(Feed._get_collection().count(), 3) # Via the RSS listing page feed, created = Feed.create_feeds_from_url( u'http://www.zdnet.fr/services/rss/')[0] # NOQA self.assertFalse(created) self.assertEquals(feed.url, u'http://www.zdnet.fr/feeds/rss/') self.assertEquals(Feed._get_collection().count(), 3) # Via the first RSS (raw) feed, created = Feed.create_feeds_from_url( u'http://www.zdnet.fr/feeds/rss/')[0] # NOQA self.assertFalse(created) self.assertEquals(feed.url, u'http://www.zdnet.fr/feeds/rss/') self.assertEquals(Feed._get_collection().count(), 3) feed, created = Feed.create_feeds_from_url( u'http://www.atlantico.fr/')[0] # NOQA self.assertTrue(created) self.assertEquals(feed.url, u'http://www.atlantico.fr/rss.xml') self.assertEquals(Feed._get_collection().count(), 4) feed, created = Feed.create_feeds_from_url(u'http://wordpress.org/')[0] self.assertTrue(created) self.assertEquals(feed.url, u'http://wordpress.org/news/feed/') self.assertEquals(Feed._get_collection().count(), 5) # Not created again, even from an article which has the comment feed. feed, created = Feed.create_feeds_from_url( u'http://ntoll.org/article/build-a-drogulus')[0] # NOQA self.assertFalse(created) self.assertEquals(feed.url, u'http://ntoll.org/rss.xml') self.assertEquals(Feed._get_collection().count(), 5) # This one has been created in .setUp() feed, created = Feed.create_feeds_from_url(u'http://blog.1flow.io/')[0] self.assertFalse(created) self.assertEquals(feed.url, u'http://blog.1flow.io/rss') self.assertEquals(Feed._get_collection().count(), 5) # No RSS in main page self.assertRaises(Exception, Feed.create_feeds_from_url, u'http://www.bbc.co.uk/') self.assertEquals(Feed._get_collection().count(), 5) # This one has no RSS anywhere, it won't create anything self.assertRaises(Exception, Feed.create_feeds_from_url, u'http://www.tumblr.com/blog/1flowio') self.assertEquals(Feed._get_collection().count(), 5) def test_closed_feeds_are_never_good(self): """ This test addresses Github #10. It is very simple, but the `.good_feeds` query is quite complex. """ self.assertTrue(len(Feed.good_feeds) == 1) closed_reason = u'closed for tests' self.feed.close(closed_reason) self.assertTrue(len(Feed.good_feeds) == 0)
def article_content_error_types(): return ContentErrorClassifier( Article.objects(content_error__ne='').no_cache(), 'content_error' ).classify()
def tearDown(self): Article.drop_collection()
class ArticleDuplicateTest(TestCase): def setUp(self): # NOTE: we need real web pages, else the absolutization won't work or # will find duplicates and tests will fail for a real-life reason. self.article1 = Article(title='test1', url='http://blog.1flow.io/post/' '59410536612/1flow-blog-has-moved').save() self.article2 = Article(title='test2', url='http://obi.1flow.io/fr/').save() self.article3 = Article(title='test3', url='http://obi.1flow.io/en/').save() # User & Reads creation for index in xrange(1, 6): username = '******' % index du = DjangoUser.objects.create(username=username, email='*****@*****.**' % username) # NOTE: the mongoDB user is created automatically. If you # try to create one it will fail with duplicate index error. u = du.mongo Read(user=u, article=self.article1).save() for index in xrange(6, 11): username = '******' % index du = DjangoUser.objects.create(username=username, email='*****@*****.**' % username) u = du.mongo Read(user=u, article=self.article2).save() # Feeds creation for index in xrange(1, 6): f = Feed(name='test feed #%s' % index, url='http://test-feed%s.com' % index).save() self.article1.update(add_to_set__feeds=f) self.article1.reload() for index in xrange(6, 11): f = Feed(name='test feed #%s' % index, url='http://test-feed%s.com' % index).save() self.article2.update(add_to_set__feeds=f) self.article2.reload() def tearDown(self): Article.drop_collection() User.drop_collection() Read.drop_collection() Feed.drop_collection() def test_register_duplicate_bare(self): self.assertEquals(Article.objects( duplicate_of__exists=False).count(), 3) self.article1.register_duplicate(self.article2) # needed because feeds are modified in another instance of the # same dabase record, via the celery task. self.article1.safe_reload() self.assertEquals(self.article1.reads.count(), 10) self.assertEquals(self.article2.reads.count(), 0) self.assertEquals(len(self.article1.feeds), 10) self.assertEquals(len(self.article2.feeds), 5) self.assertEquals(self.article2.duplicate_of, self.article1) self.assertEquals(Article.objects( duplicate_of__exists=True).count(), 1) self.assertEquals(Article.objects( duplicate_of__exists=False).count(), 2) def test_register_duplicate_not_again(self): self.article1.register_duplicate(self.article2) self.article1.safe_reload() self.assertEquals(self.article2.duplicate_of, self.article1)
def tearDown(self): Subscription.drop_collection() Feed.drop_collection() Read.drop_collection() Article.drop_collection() User.drop_collection()
class FeedsTest(TestCase): def setUp(self): # NOTE: we need real web pages, else the absolutization won't work or # will find duplicates and tests will fail for a real-life reason. self.article1 = Article(title='test1', url='http://blog.1flow.io/post/' '59410536612/1flow-blog-has-moved').save() self.feed = Feed(name='1flow test feed', url='http://blog.1flow.io/rss').save() self.article1.update(add_to_set__feeds=self.feed) self.article1.reload() # User & Reads creation for index in xrange(1, 2): username = '******' % index du = DjangoUser.objects.create(username=username, email='*****@*****.**' % username) # PG post_save() signal already created the MongoDB user. u = du.mongo Read(user=u, article=self.article1).save() Subscription(user=u, feed=self.feed).save() for index in xrange(2, 5): username = '******' % index du = DjangoUser.objects.create(username=username, email='*****@*****.**' % username) def tearDown(self): Subscription.drop_collection() Feed.drop_collection() Read.drop_collection() Article.drop_collection() User.drop_collection() def test_close(self): closed_reason = u'closed for tests' self.feed.close(closed_reason) self.assertTrue(self.feed.closed) self.assertEquals(self.feed.closed_reason, closed_reason) self.assertFalse(self.feed.date_closed is None) global_feeds_checker() self.assertEquals(len(mail.outbox), 1) self.assertTrue(u'Reminder: 1 feed(s) closed in last' in mail.outbox[0].subject) self.assertTrue(unicode(self.feed) in mail.outbox[0].body) #self.assertEqual( mail.outbox[0].to, [ "*****@*****.**" ] ) #self.assertTrue( "*****@*****.**" in mail.outbox[0].to ) def test_feeds_creation(self): # .setUp() creates one already. self.assertEquals(Feed._get_collection().count(), 1) feed, created = Feed.create_feeds_from_url(u'http://ntoll.org/')[0] self.assertTrue(created) self.assertEquals(feed.url, u'http://ntoll.org/rss.xml') self.assertEquals(Feed._get_collection().count(), 2) # Via the Home Page feed, created = Feed.create_feeds_from_url(u'http://www.zdnet.fr/')[0] self.assertTrue(created) self.assertEquals(feed.url, u'http://www.zdnet.fr/feeds/rss/') self.assertEquals(Feed._get_collection().count(), 3) # Via the RSS listing page feed, created = Feed.create_feeds_from_url(u'http://www.zdnet.fr/services/rss/')[0] # NOQA self.assertFalse(created) self.assertEquals(feed.url, u'http://www.zdnet.fr/feeds/rss/') self.assertEquals(Feed._get_collection().count(), 3) # Via the first RSS (raw) feed, created = Feed.create_feeds_from_url(u'http://www.zdnet.fr/feeds/rss/')[0] # NOQA self.assertFalse(created) self.assertEquals(feed.url, u'http://www.zdnet.fr/feeds/rss/') self.assertEquals(Feed._get_collection().count(), 3) feed, created = Feed.create_feeds_from_url(u'http://www.atlantico.fr/')[0] # NOQA self.assertTrue(created) self.assertEquals(feed.url, u'http://www.atlantico.fr/rss.xml') self.assertEquals(Feed._get_collection().count(), 4) feed, created = Feed.create_feeds_from_url(u'http://wordpress.org/')[0] self.assertTrue(created) self.assertEquals(feed.url, u'http://wordpress.org/news/feed/') self.assertEquals(Feed._get_collection().count(), 5) # Not created again, even from an article which has the comment feed. feed, created = Feed.create_feeds_from_url(u'http://ntoll.org/article/build-a-drogulus')[0] # NOQA self.assertFalse(created) self.assertEquals(feed.url, u'http://ntoll.org/rss.xml') self.assertEquals(Feed._get_collection().count(), 5) # This one has been created in .setUp() feed, created = Feed.create_feeds_from_url(u'http://blog.1flow.io/')[0] self.assertFalse(created) self.assertEquals(feed.url, u'http://blog.1flow.io/rss') self.assertEquals(Feed._get_collection().count(), 5) # No RSS in main page self.assertRaises(Exception, Feed.create_feeds_from_url, u'http://www.bbc.co.uk/') self.assertEquals(Feed._get_collection().count(), 5) # This one has no RSS anywhere, it won't create anything self.assertRaises(Exception, Feed.create_feeds_from_url, u'http://www.tumblr.com/blog/1flowio') self.assertEquals(Feed._get_collection().count(), 5)
class ArticleDuplicateTest(TestCase): def setUp(self): # NOTE: we need real web pages, else the absolutization won't work or # will find duplicates and tests will fail for a real-life reason. self.article1 = Article(title='test1', url='http://blog.1flow.io/post/' '59410536612/1flow-blog-has-moved').save() self.article2 = Article(title='test2', url='http://obi.1flow.io/fr/').save() self.article3 = Article(title='test3', url='http://obi.1flow.io/en/').save() # User & Reads creation for index in xrange(1, 6): username = '******' % index du = DjangoUser.objects.create(username=username, email='*****@*****.**' % username) # NOTE: the mongoDB user is created automatically. If you # try to create one it will fail with duplicate index error. u = du.mongo Read(user=u, article=self.article1).save() for index in xrange(6, 11): username = '******' % index du = DjangoUser.objects.create(username=username, email='*****@*****.**' % username) u = du.mongo Read(user=u, article=self.article2).save() # Feeds creation for index in xrange(1, 6): f = Feed(name='test feed #%s' % index, url='http://test-feed%s.com' % index).save() self.article1.update(add_to_set__feeds=f) self.article1.reload() for index in xrange(6, 11): f = Feed(name='test feed #%s' % index, url='http://test-feed%s.com' % index).save() self.article2.update(add_to_set__feeds=f) self.article2.reload() def tearDown(self): Article.drop_collection() User.drop_collection() Read.drop_collection() Feed.drop_collection() def test_register_duplicate_bare(self): self.assertEquals( Article.objects(duplicate_of__exists=False).count(), 3) self.article1.register_duplicate(self.article2) # needed because feeds are modified in another instance of the # same dabase record, via the celery task. self.article1.safe_reload() self.assertEquals(self.article1.reads.count(), 10) self.assertEquals(self.article2.reads.count(), 0) self.assertEquals(len(self.article1.feeds), 10) self.assertEquals(len(self.article2.feeds), 5) self.assertEquals(self.article2.duplicate_of, self.article1) self.assertEquals( Article.objects(duplicate_of__exists=True).count(), 1) self.assertEquals( Article.objects(duplicate_of__exists=False).count(), 2) def test_register_duplicate_not_again(self): self.article1.register_duplicate(self.article2) self.article1.safe_reload() self.assertEquals(self.article2.duplicate_of, self.article1)
def tearDown(self): Article.drop_collection() Feed.drop_collection()
def tearDown(self): Article.drop_collection() User.drop_collection() Read.drop_collection() Feed.drop_collection()
class AbsolutizeTest(TestCase): def setUp(self): #Article.drop_collection() #Feed.drop_collection() self.article1 = Article( title=u'test1', url= u'http://rss.feedsportal.com/c/707/f/9951/s/2b27496a/l/0L0Sreseaux0Etelecoms0Bnet0Cactualites0Clire0Elancement0Emondial0Edu0Esamsung0Egalaxy0Es40E25980A0Bhtml/story01.htm' ).save() # NOQA self.article2 = Article( title=u'test2', url= u'http://feedproxy.google.com/~r/francaistechcrunch/~3/hEIhLwVyEEI/' ).save() # NOQA self.article3 = Article( title=u'test3', url=u'http://obi.1flow.io/absolutize_test_401').save() # NOQA self.article4 = Article( title=u'test4', url=u'http://host.non.exixstentz.com/absolutize_test').save( ) # NOQA self.article5 = Article( title=u'test5', url=u'http://1flow.io/absolutize_test_404').save() # NOQA def tearDown(self): Article.drop_collection() Feed.drop_collection() def test_absolutize(self): self.article1.absolutize_url() self.assertEquals( self.article1.url, u'http://www.reseaux-telecoms.net/actualites/lire-lancement-mondial-du-samsung-galaxy-s4-25980.html' ) # NOQA self.assertEquals(self.article1.url_absolute, True) self.assertEquals(self.article1.url_error, '') self.article2.absolutize_url() self.assertEquals( self.article2.url, u'http://techcrunch.com/2013/05/18/hell-no-tumblr-users-wont-go-to-yahoo/' ) # NOQA self.assertEquals(self.article2.url_absolute, True) self.assertEquals(self.article2.url_error, '') def test_absolutize_errors(self): # # NOTE: if a PROXY is set, the reasons word cases can vary. # eg. 'Not Found' (via Squid) instead of 'NOT FOUND' (direct answer). # self.article3.absolutize_url() self.assertEquals(self.article3.url, u'http://obi.1flow.io/absolutize_test_401') # NOQA self.assertEquals(self.article3.url_absolute, False) self.assertEquals( self.article3.url_error, u'HTTP Error 401 (Unauthorized) while resolving http://obi.1flow.io/absolutize_test_401.' ) # NOQA self.article5.absolutize_url() self.assertEquals(self.article5.url, u'http://1flow.io/absolutize_test_404') # NOQA self.assertEquals(self.article5.url_absolute, False) self.assertEquals( self.article5.url_error, u'HTTP Error 404 (NOT FOUND) while resolving http://1flow.io/absolutize_test_404.' ) # NOQA self.article4.absolutize_url() self.assertEquals( self.article4.url, u'http://host.non.exixstentz.com/absolutize_test') # NOQA self.assertEquals(self.article4.url_absolute, False) self.assertEquals( self.article4.url_error[:108], u"HTTPConnectionPool(host='host.non.exixstentz.com', port=80): Max retries exceeded with url: /absolutize_test" ) # NOQA