Exemplo n.º 1
0
    def setUp(self):

        #Article.drop_collection()
        #Feed.drop_collection()

        self.article1 = Article(
            title=u'test1',
            url=
            u'http://rss.feedsportal.com/c/707/f/9951/s/2b27496a/l/0L0Sreseaux0Etelecoms0Bnet0Cactualites0Clire0Elancement0Emondial0Edu0Esamsung0Egalaxy0Es40E25980A0Bhtml/story01.htm'
        ).save()  # NOQA
        self.article2 = Article(
            title=u'test2',
            url=
            u'http://feedproxy.google.com/~r/francaistechcrunch/~3/hEIhLwVyEEI/'
        ).save()  # NOQA
        self.article3 = Article(
            title=u'test3',
            url=u'http://obi.1flow.io/absolutize_test_401').save()  # NOQA
        self.article4 = Article(
            title=u'test4',
            url=u'http://host.non.exixstentz.com/absolutize_test').save(
            )  # NOQA
        self.article5 = Article(
            title=u'test5',
            url=u'http://1flow.io/absolutize_test_404').save()  # NOQA
Exemplo n.º 2
0
    def setUp(self):

        # NOTE: we need real web pages, else the absolutization won't work or
        # will find duplicates and tests will fail for a real-life reason.
        self.article1 = Article(title='test1',
                                url='http://blog.1flow.io/post/'
                                '59410536612/1flow-blog-has-moved').save()

        self.feed = Feed(name='1flow test feed',
                         url='http://blog.1flow.io/rss').save()

        self.article1.update(add_to_set__feeds=self.feed)
        self.article1.reload()

        # User & Reads creation
        for index in xrange(1, 2):
            username = '******' % index
            du = DjangoUser.objects.create(username=username,
                                           email='*****@*****.**' % username)
            # PG post_save() signal already created the MongoDB user.
            u = du.mongo
            Read(user=u, article=self.article1).save()
            Subscription(user=u, feed=self.feed).save()

        for index in xrange(2, 5):
            username = '******' % index
            du = DjangoUser.objects.create(username=username,
                                           email='*****@*****.**' % username)
Exemplo n.º 3
0
    def setUp(self):

        WebSite.drop_collection()
        Article.drop_collection()

        self.ws1 = WebSite(url='http://test1.com').save()
        self.ws2 = WebSite(url='http://test2.com').save()
Exemplo n.º 4
0
    def test_register_duplicate_bare(self):

        self.assertEquals(Article.objects(
                          duplicate_of__exists=False).count(), 3)

        self.article1.register_duplicate(self.article2)

        # needed because feeds are modified in another instance of the
        # same dabase record, via the celery task.
        self.article1.safe_reload()

        self.assertEquals(self.article1.reads.count(), 10)

        self.assertEquals(self.article2.reads.count(), 0)

        self.assertEquals(len(self.article1.feeds), 10)

        self.assertEquals(len(self.article2.feeds), 5)

        self.assertEquals(self.article2.duplicate_of, self.article1)

        self.assertEquals(Article.objects(
                          duplicate_of__exists=True).count(), 1)
        self.assertEquals(Article.objects(
                          duplicate_of__exists=False).count(), 2)
Exemplo n.º 5
0
    def test_register_duplicate_bare(self):

        self.assertEquals(
            Article.objects(duplicate_of__exists=False).count(), 3)

        self.article1.register_duplicate(self.article2)

        # needed because feeds are modified in another instance of the
        # same dabase record, via the celery task.
        self.article1.safe_reload()

        self.assertEquals(self.article1.reads.count(), 10)

        self.assertEquals(self.article2.reads.count(), 0)

        self.assertEquals(len(self.article1.feeds), 10)

        self.assertEquals(len(self.article2.feeds), 5)

        self.assertEquals(self.article2.duplicate_of, self.article1)

        self.assertEquals(
            Article.objects(duplicate_of__exists=True).count(), 1)
        self.assertEquals(
            Article.objects(duplicate_of__exists=False).count(), 2)
Exemplo n.º 6
0
    def setUp(self):

        WebSite.drop_collection()
        Article.drop_collection()

        self.ws1 = WebSite(url='http://test1.com').save()
        self.ws2 = WebSite(url='http://test2.com').save()
Exemplo n.º 7
0
class AbsolutizeTest(TestCase):

    def setUp(self):

        #Article.drop_collection()
        #Feed.drop_collection()

        self.article1 = Article(title=u'test1',
                                url=u'http://rss.feedsportal.com/c/707/f/9951/s/2b27496a/l/0L0Sreseaux0Etelecoms0Bnet0Cactualites0Clire0Elancement0Emondial0Edu0Esamsung0Egalaxy0Es40E25980A0Bhtml/story01.htm').save() # NOQA
        self.article2 = Article(title=u'test2',
                                url=u'http://feedproxy.google.com/~r/francaistechcrunch/~3/hEIhLwVyEEI/').save() # NOQA
        self.article3 = Article(title=u'test3',
                                url=u'http://obi.1flow.io/absolutize_test_401').save() # NOQA
        self.article4 = Article(title=u'test4',
                                url=u'http://host.non.exixstentz.com/absolutize_test').save() # NOQA
        self.article5 = Article(title=u'test5',
                                url=u'http://1flow.io/absolutize_test_404').save() # NOQA

    def tearDown(self):
        Article.drop_collection()
        Feed.drop_collection()

    def test_absolutize(self):
        self.article1.absolutize_url()
        self.assertEquals(self.article1.url, u'http://www.reseaux-telecoms.net/actualites/lire-lancement-mondial-du-samsung-galaxy-s4-25980.html') # NOQA
        self.assertEquals(self.article1.url_absolute, True)
        self.assertEquals(self.article1.url_error, '')

        self.article2.absolutize_url()
        self.assertEquals(self.article2.url, u'http://techcrunch.com/2013/05/18/hell-no-tumblr-users-wont-go-to-yahoo/') # NOQA
        self.assertEquals(self.article2.url_absolute, True)
        self.assertEquals(self.article2.url_error, '')

    def test_absolutize_errors(self):

        #
        # NOTE: if a PROXY is set, the reasons word cases can vary.
        # eg. 'Not Found' (via Squid) instead of 'NOT FOUND' (direct answer).
        #

        self.article3.absolutize_url()
        self.assertEquals(self.article3.url, u'http://obi.1flow.io/absolutize_test_401') # NOQA
        self.assertEquals(self.article3.url_absolute, False)
        self.assertEquals(self.article3.url_error, u'HTTP Error 401 (Unauthorized) while resolving http://obi.1flow.io/absolutize_test_401.') # NOQA

        self.article5.absolutize_url()
        self.assertEquals(self.article5.url, u'http://1flow.io/absolutize_test_404') # NOQA
        self.assertEquals(self.article5.url_absolute, False)
        self.assertEquals(self.article5.url_error, u'HTTP Error 404 (NOT FOUND) while resolving http://1flow.io/absolutize_test_404.') # NOQA

        self.article4.absolutize_url()
        self.assertEquals(self.article4.url, u'http://host.non.exixstentz.com/absolutize_test') # NOQA
        self.assertEquals(self.article4.url_absolute, False)
        self.assertEquals(self.article4.url_error[:108], u"HTTPConnectionPool(host='host.non.exixstentz.com', port=80): Max retries exceeded with url: /absolutize_test") # NOQA
Exemplo n.º 8
0
def synchronize_statsd_articles_gauges(full=False):

    with benchmark('synchronize statsd gauges for Article.*'):

        empty               = Article.objects(content_type=0).no_cache()
        #empty_pending       = empty.filter(content_error='', url_error='')
        #empty_content_error = empty.filter(content_error__ne='')
        #empty_url_error     = empty.filter(url_error__ne='')

        parsed             = Article.objects(content_type__ne=CONTENT_TYPE_NONE)
        html               = parsed.filter(content_type=CONTENT_TYPE_HTML)
        markdown           = parsed.filter(content_type=CONTENT_TYPE_MARKDOWN)

        absolutes          = Article.objects(url_absolute=True).no_cache()
        duplicates         = Article.objects(duplicate_of__ne=None).no_cache()
        orphaned           = Article.objects(orphaned=True).no_cache()
        content_errors     = Article.objects(content_error__ne='').no_cache()
        url_errors         = Article.objects(url_error__ne='').no_cache()

        statsd.gauge('articles.counts.total', Article._get_collection().count())
        statsd.gauge('articles.counts.markdown', markdown.count())
        statsd.gauge('articles.counts.html', html.count())
        statsd.gauge('articles.counts.empty', empty.count())
        statsd.gauge('articles.counts.content_errors', content_errors.count())
        statsd.gauge('articles.counts.url_errors', url_errors.count())

        if full:
            statsd.gauge('articles.counts.orphaned', orphaned.count())
            statsd.gauge('articles.counts.absolutes', absolutes.count())
            statsd.gauge('articles.counts.duplicates', duplicates.count())
Exemplo n.º 9
0
    def setUp(self):

        #Article.drop_collection()
        #Feed.drop_collection()

        self.article1 = Article(title=u'test1',
                                url=u'http://rss.feedsportal.com/c/707/f/9951/s/2b27496a/l/0L0Sreseaux0Etelecoms0Bnet0Cactualites0Clire0Elancement0Emondial0Edu0Esamsung0Egalaxy0Es40E25980A0Bhtml/story01.htm').save() # NOQA
        self.article2 = Article(title=u'test2',
                                url=u'http://feedproxy.google.com/~r/francaistechcrunch/~3/hEIhLwVyEEI/').save() # NOQA
        self.article3 = Article(title=u'test3',
                                url=u'http://obi.1flow.io/absolutize_test_401').save() # NOQA
        self.article4 = Article(title=u'test4',
                                url=u'http://host.non.exixstentz.com/absolutize_test').save() # NOQA
        self.article5 = Article(title=u'test5',
                                url=u'http://1flow.io/absolutize_test_404').save() # NOQA
Exemplo n.º 10
0
    def setUp(self):

        # NOTE: we need real web pages, else the absolutization won't work or
        # will find duplicates and tests will fail for a real-life reason.
        self.article1 = Article(title='test1',
                                url='http://blog.1flow.io/post/'
                                '59410536612/1flow-blog-has-moved').save()

        self.feed = Feed(name='1flow test feed',
                         url='http://blog.1flow.io/rss').save()

        self.article1.update(add_to_set__feeds=self.feed)
        self.article1.reload()

        # User & Reads creation
        for index in xrange(1, 2):
            username = '******' % index
            du = DjangoUser.objects.create(username=username,
                                           email='*****@*****.**' % username)
            # PG post_save() signal already created the MongoDB user.
            u = du.mongo
            Read(user=u, article=self.article1).save()
            Subscription(user=u, feed=self.feed).save()

        for index in xrange(2, 5):
            username = '******' % index
            du = DjangoUser.objects.create(username=username,
                                           email='*****@*****.**' % username)
Exemplo n.º 11
0
    def test_url_error_classifier(self):

        # NOTE: these errors strings are directly taken from the production
        #       database. Only URLs have been changed for tests.
        #

        results = UrlErrorClassifier(Article.objects(url_error__ne=''),
                                     'url_error').classify()

        self.assertEquals(sorted(results.keys()), [u'duration',
                          u'error_types', u'seen_objects', u'stored_instances'])
        self.assertEquals(results.get('seen_objects'), 5)

        stored = results.get('stored_instances')
        errors = results.get('error_types')

        self.assertEquals(len(errors), 4)
        self.assertEquals(len(stored), 4)

        err404 = stored.get(UrlErrorClassifier.ERR_NETWORK_HTTP404)

        self.assertEquals(len(err404), 2)
        self.assertTrue(self.a3 in err404)
        self.assertTrue(self.a4 in err404)

        err401 = stored.get(UrlErrorClassifier.ERR_NETWORK_HTTP401)
        self.assertEquals(err401, None)
Exemplo n.º 12
0
    def test_url_error_classifier(self):

        # NOTE: these errors strings are directly taken from the production
        #       database. Only URLs have been changed for tests.
        #

        results = UrlErrorClassifier(Article.objects(url_error__ne=''),
                                     'url_error').classify()

        self.assertEquals(sorted(results.keys()), [
            u'duration', u'error_types', u'seen_objects', u'stored_instances'
        ])
        self.assertEquals(results.get('seen_objects'), 5)

        stored = results.get('stored_instances')
        errors = results.get('error_types')

        self.assertEquals(len(errors), 4)
        self.assertEquals(len(stored), 4)

        err404 = stored.get(UrlErrorClassifier.ERR_NETWORK_HTTP404)

        self.assertEquals(len(err404), 2)
        self.assertTrue(self.a3 in err404)
        self.assertTrue(self.a4 in err404)

        err401 = stored.get(UrlErrorClassifier.ERR_NETWORK_HTTP401)
        self.assertEquals(err401, None)
Exemplo n.º 13
0
    def setUp(self):

        # NOTE: we need real web pages, else the absolutization won't work or
        # will find duplicates and tests will fail for a real-life reason.
        self.article1 = Article(title='test1',
                                url='http://blog.1flow.io/post/'
                                '59410536612/1flow-blog-has-moved').save()
        self.article2 = Article(title='test2',
                                url='http://obi.1flow.io/fr/').save()
        self.article3 = Article(title='test3',
                                url='http://obi.1flow.io/en/').save()

        # User & Reads creation
        for index in xrange(1, 6):
            username = '******' % index
            du = DjangoUser.objects.create(username=username,
                                           email='*****@*****.**' % username)
            # NOTE: the mongoDB user is created automatically. If you
            # try to create one it will fail with duplicate index error.
            u = du.mongo
            Read(user=u, article=self.article1).save()

        for index in xrange(6, 11):
            username = '******' % index
            du = DjangoUser.objects.create(username=username,
                                           email='*****@*****.**' % username)
            u = du.mongo
            Read(user=u, article=self.article2).save()

        # Feeds creation
        for index in xrange(1, 6):
            f = Feed(name='test feed #%s' % index,
                     url='http://test-feed%s.com' % index).save()
            self.article1.update(add_to_set__feeds=f)

            self.article1.reload()

        for index in xrange(6, 11):
            f = Feed(name='test feed #%s' % index,
                     url='http://test-feed%s.com' % index).save()
            self.article2.update(add_to_set__feeds=f)

            self.article2.reload()
Exemplo n.º 14
0
def article_url_error_types():

    # Next to investigate:
    #    list index out of range: 758
    #    'NoneType' object has no attribute 'findAll': 137

    return UrlErrorClassifier(
        Article.objects(url_error__ne='').no_cache(),
        'url_error'
    ).classify()
Exemplo n.º 15
0
    def setUp(self):

        # NOTE: we need real web pages, else the absolutization won't work or
        # will find duplicates and tests will fail for a real-life reason.
        self.article1 = Article(title='test1',
                                url='http://blog.1flow.io/post/'
                                '59410536612/1flow-blog-has-moved').save()
        self.article2 = Article(title='test2',
                                url='http://obi.1flow.io/fr/').save()
        self.article3 = Article(title='test3',
                                url='http://obi.1flow.io/en/').save()

        # User & Reads creation
        for index in xrange(1, 6):
            username = '******' % index
            du = DjangoUser.objects.create(username=username,
                                           email='*****@*****.**' % username)
            # NOTE: the mongoDB user is created automatically. If you
            # try to create one it will fail with duplicate index error.
            u = du.mongo
            Read(user=u, article=self.article1).save()

        for index in xrange(6, 11):
            username = '******' % index
            du = DjangoUser.objects.create(username=username,
                                           email='*****@*****.**' % username)
            u = du.mongo
            Read(user=u, article=self.article2).save()

        # Feeds creation
        for index in xrange(1, 6):
            f = Feed(name='test feed #%s' % index,
                     url='http://test-feed%s.com' % index).save()
            self.article1.update(add_to_set__feeds=f)

            self.article1.reload()

        for index in xrange(6, 11):
            f = Feed(name='test feed #%s' % index,
                     url='http://test-feed%s.com' % index).save()
            self.article2.update(add_to_set__feeds=f)

            self.article2.reload()
Exemplo n.º 16
0
    def test_generic_errors_classifiers(self):

        results = GenericErrorClassifier(Article.objects(url_error__ne=''),
                                         'url_error').classify()

        stored = results.get('stored_instances')
        errors = results.get('error_types')

        self.assertEquals(results.get('seen_objects'), 5)

        self.assertEquals(len(errors), 5)
        self.assertEquals(len(stored), 5)
Exemplo n.º 17
0
    def test_python_errors_classifiers(self):

        results = PythonErrorClassifier(Article.objects(url_error__ne=''),
                                        'url_error').classify()

        stored = results.get('stored_instances')
        errors = results.get('error_types')

        self.assertEquals(results.get('seen_objects'), 5)

        self.assertEquals(len(errors), 5)
        self.assertEquals(len(stored), 5)
Exemplo n.º 18
0
    def test_content_error_classifier(self):

        # NOTE: these errors strings are directly taken from the production
        #       database. Only URLs have been changed for tests.
        #
        # ValidationError (Article:51fa68957711037f4003a37b) (1.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa68e47711037f3d03a3fe) (5.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa6b6aa24639329b2ce203) (1.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa69c3a24639329a2ce21a) (3.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa67a97711037f3d03a33d) (GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa68e57711037f3d03a413) (4.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa64377711037f3f03a30c) (2.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa69c3a24639329a2ce207) (3.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa6b3f7711037f6a25ae46) (6.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa6b68a2463932a02ce2af) (11.GenericReferences can only contain documents: ['tags']): 1

        # TODO: url_error__ne -> content_error__ne
        # when we fully implement this test method.
        results = ContentErrorClassifier(Article.objects(url_error__ne=''),
                                         'content_error').classify()

        self.assertEquals(results.get('seen_objects'), 5)
Exemplo n.º 19
0
    def test_content_error_classifier(self):

        # NOTE: these errors strings are directly taken from the production
        #       database. Only URLs have been changed for tests.
        #
        # ValidationError (Article:51fa68957711037f4003a37b) (1.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa68e47711037f3d03a3fe) (5.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa6b6aa24639329b2ce203) (1.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa69c3a24639329a2ce21a) (3.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa67a97711037f3d03a33d) (GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa68e57711037f3d03a413) (4.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa64377711037f3f03a30c) (2.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa69c3a24639329a2ce207) (3.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa6b3f7711037f6a25ae46) (6.GenericReferences can only contain documents: ['tags']): 1
        # ValidationError (Article:51fa6b68a2463932a02ce2af) (11.GenericReferences can only contain documents: ['tags']): 1

        # TODO: url_error__ne -> content_error__ne
        # when we fully implement this test method.
        results = ContentErrorClassifier(Article.objects(url_error__ne=''),
                                         'content_error').classify()

        self.assertEquals(results.get('seen_objects'), 5)
Exemplo n.º 20
0
    def setUp(self):

        # NOTE: we need real web pages, else the absolutization won't work or
        # will find duplicates and tests will fail for a real-life reason.
        # Here we need to keep an article without any url_error, so we have
        # to make it point to a real working URL.
        self.a1 = Article(
            title='ErrorClassifierTests #1',
            url='http://blog.1flow.io/post/59410536612/1flow-blog-has-moved'
        ).save()  # NOQA

        self.a2 = Article(
            title='ErrorClassifierTests #2',
            url='http://t.co/t2',
            url_error=
            "HTTPConnectionPool(host='t.co', port=80): Max retries exceeded with url: /t1 (Caused by <class 'socket.error'>: [Errno 60] Operation timed out)"
        ).save()  # NOQA
        self.a3 = Article(
            title='ErrorClassifierTests #3',
            url='http://t.co/t3',
            url_error=
            "HTTP Error 404 (Not Found) while resolving http://t.co/t3.").save(
            )  # NOQA
        self.a4 = Article(
            title='ErrorClassifierTests #4',
            url='http://t.co/t4',
            url_error=
            "HTTP Error 404 (Not Found) while resolving http://t.co/t4.").save(
            )  # NOQA
        self.a5 = Article(
            title='ErrorClassifierTests #5',
            url='http://t.co/t5',
            url_error=
            "HTTPConnectionPool(host='t.co', port=80): Max retries exceeded with url: /t5 (Caused by <class 'socket.error'>: [Errno 65] No route to host)"
        ).save()  # NOQA
        self.a6 = Article(
            title='ErrorClassifierTests #6',
            url='http://t.co/6',
            url_error=
            "HTTPConnectionPool(host='t.co', port=80): Max retries exceeded with url: /t6 (Caused by <class 'socket.error'>: [Errno 54] Connection reset by peer)"
        ).save()  # NOQA
Exemplo n.º 21
0
import logging

from django.test import TestCase
from django.test.utils import override_settings

from oneflow.base.tests import connect_mongodb_testsuite
from oneflow.core.models import Article
from oneflow.core.stats import (PythonErrorClassifier, GenericErrorClassifier,
                                UrlErrorClassifier, ContentErrorClassifier)

LOGGER = logging.getLogger(__file__)

connect_mongodb_testsuite()

Article.drop_collection()


@override_settings(STATICFILES_STORAGE=
                   'pipeline.storage.NonPackagingPipelineStorage',
                   CELERY_EAGER_PROPAGATES_EXCEPTIONS=True,
                   CELERY_ALWAYS_EAGER=True,
                   BROKER_BACKEND='memory',)
class ErrorClassifierTests(TestCase):

    def setUp(self):

        # NOTE: we need real web pages, else the absolutization won't work or
        # will find duplicates and tests will fail for a real-life reason.
        # Here we need to keep an article without any url_error, so we have
        # to make it point to a real working URL.
Exemplo n.º 22
0
 def tearDown(self):
     WebSite.drop_collection()
     Article.drop_collection()
Exemplo n.º 23
0
from oneflow.core.tasks import global_feeds_checker
from oneflow.base.utils import RedisStatsCounter
from oneflow.base.tests import (connect_mongodb_testsuite, TEST_REDIS)

DjangoUser = get_user_model()
LOGGER = logging.getLogger(__file__)

# Use the test database not to pollute the production/development one.
RedisStatsCounter.REDIS = TEST_REDIS

TEST_REDIS.flushdb()

connect_mongodb_testsuite()

# Empty the database before starting in case an old test failed to tearDown().
Article.drop_collection()
Read.drop_collection()
User.drop_collection()
Group.drop_collection()
Feed.drop_collection()
Tag.drop_collection()
Folder.drop_collection()
WebSite.drop_collection()
Author.drop_collection()


class ThrottleIntervalTest(TestCase):
    def test_lower_interval_with_etag_or_modified(self):

        t = Feed.throttle_fetch_interval
Exemplo n.º 24
0
class FeedsTest(TestCase):
    def setUp(self):

        # NOTE: we need real web pages, else the absolutization won't work or
        # will find duplicates and tests will fail for a real-life reason.
        self.article1 = Article(title='test1',
                                url='http://blog.1flow.io/post/'
                                '59410536612/1flow-blog-has-moved').save()

        self.feed = Feed(name='1flow test feed',
                         url='http://blog.1flow.io/rss').save()

        self.article1.update(add_to_set__feeds=self.feed)
        self.article1.reload()

        # User & Reads creation
        for index in xrange(1, 2):
            username = '******' % index
            du = DjangoUser.objects.create(username=username,
                                           email='*****@*****.**' % username)
            # PG post_save() signal already created the MongoDB user.
            u = du.mongo
            Read(user=u, article=self.article1).save()
            Subscription(user=u, feed=self.feed).save()

        for index in xrange(2, 5):
            username = '******' % index
            du = DjangoUser.objects.create(username=username,
                                           email='*****@*****.**' % username)

    def tearDown(self):
        Subscription.drop_collection()
        Feed.drop_collection()
        Read.drop_collection()
        Article.drop_collection()
        User.drop_collection()

    def test_close(self):

        closed_reason = u'closed for tests'

        self.feed.close(closed_reason)

        self.assertTrue(self.feed.closed)
        self.assertEquals(self.feed.closed_reason, closed_reason)
        self.assertFalse(self.feed.date_closed is None)

        global_feeds_checker()

        self.assertEquals(len(mail.outbox), 1)
        self.assertTrue(
            u'Reminder: 1 feed(s) closed in last' in mail.outbox[0].subject)
        self.assertTrue(unicode(self.feed) in mail.outbox[0].body)

        #self.assertEqual( mail.outbox[0].to, [ "*****@*****.**" ] )
        #self.assertTrue( "*****@*****.**" in mail.outbox[0].to )

    def test_feeds_creation(self):

        # .setUp() creates one already.
        self.assertEquals(Feed._get_collection().count(), 1)

        feed, created = Feed.create_feeds_from_url(u'http://ntoll.org/')[0]
        self.assertTrue(created)
        self.assertEquals(feed.url, u'http://ntoll.org/rss.xml')
        self.assertEquals(Feed._get_collection().count(), 2)

        # Via the Home Page
        feed, created = Feed.create_feeds_from_url(u'http://www.zdnet.fr/')[0]
        self.assertTrue(created)
        self.assertEquals(feed.url, u'http://www.zdnet.fr/feeds/rss/')
        self.assertEquals(Feed._get_collection().count(), 3)

        # Via the RSS listing page
        feed, created = Feed.create_feeds_from_url(
            u'http://www.zdnet.fr/services/rss/')[0]  # NOQA
        self.assertFalse(created)
        self.assertEquals(feed.url, u'http://www.zdnet.fr/feeds/rss/')
        self.assertEquals(Feed._get_collection().count(), 3)

        # Via the first RSS (raw)
        feed, created = Feed.create_feeds_from_url(
            u'http://www.zdnet.fr/feeds/rss/')[0]  # NOQA
        self.assertFalse(created)
        self.assertEquals(feed.url, u'http://www.zdnet.fr/feeds/rss/')
        self.assertEquals(Feed._get_collection().count(), 3)

        feed, created = Feed.create_feeds_from_url(
            u'http://www.atlantico.fr/')[0]  # NOQA
        self.assertTrue(created)
        self.assertEquals(feed.url, u'http://www.atlantico.fr/rss.xml')
        self.assertEquals(Feed._get_collection().count(), 4)

        feed, created = Feed.create_feeds_from_url(u'http://wordpress.org/')[0]
        self.assertTrue(created)
        self.assertEquals(feed.url, u'http://wordpress.org/news/feed/')
        self.assertEquals(Feed._get_collection().count(), 5)

        # Not created again, even from an article which has the comment feed.
        feed, created = Feed.create_feeds_from_url(
            u'http://ntoll.org/article/build-a-drogulus')[0]  # NOQA
        self.assertFalse(created)
        self.assertEquals(feed.url, u'http://ntoll.org/rss.xml')
        self.assertEquals(Feed._get_collection().count(), 5)

        # This one has been created in .setUp()
        feed, created = Feed.create_feeds_from_url(u'http://blog.1flow.io/')[0]
        self.assertFalse(created)
        self.assertEquals(feed.url, u'http://blog.1flow.io/rss')
        self.assertEquals(Feed._get_collection().count(), 5)

        # No RSS in main page
        self.assertRaises(Exception, Feed.create_feeds_from_url,
                          u'http://www.bbc.co.uk/')
        self.assertEquals(Feed._get_collection().count(), 5)

        # This one has no RSS anywhere, it won't create anything
        self.assertRaises(Exception, Feed.create_feeds_from_url,
                          u'http://www.tumblr.com/blog/1flowio')
        self.assertEquals(Feed._get_collection().count(), 5)

    def test_closed_feeds_are_never_good(self):
        """ This test addresses Github #10.

            It is very simple, but the `.good_feeds` query is quite complex.
        """

        self.assertTrue(len(Feed.good_feeds) == 1)

        closed_reason = u'closed for tests'
        self.feed.close(closed_reason)

        self.assertTrue(len(Feed.good_feeds) == 0)
Exemplo n.º 25
0
def article_content_error_types():

    return ContentErrorClassifier(
        Article.objects(content_error__ne='').no_cache(),
        'content_error'
    ).classify()
Exemplo n.º 26
0
 def tearDown(self):
     Article.drop_collection()
Exemplo n.º 27
0
class ArticleDuplicateTest(TestCase):

    def setUp(self):

        # NOTE: we need real web pages, else the absolutization won't work or
        # will find duplicates and tests will fail for a real-life reason.
        self.article1 = Article(title='test1',
                                url='http://blog.1flow.io/post/'
                                '59410536612/1flow-blog-has-moved').save()
        self.article2 = Article(title='test2',
                                url='http://obi.1flow.io/fr/').save()
        self.article3 = Article(title='test3',
                                url='http://obi.1flow.io/en/').save()

        # User & Reads creation
        for index in xrange(1, 6):
            username = '******' % index
            du = DjangoUser.objects.create(username=username,
                                           email='*****@*****.**' % username)
            # NOTE: the mongoDB user is created automatically. If you
            # try to create one it will fail with duplicate index error.
            u = du.mongo
            Read(user=u, article=self.article1).save()

        for index in xrange(6, 11):
            username = '******' % index
            du = DjangoUser.objects.create(username=username,
                                           email='*****@*****.**' % username)
            u = du.mongo
            Read(user=u, article=self.article2).save()

        # Feeds creation
        for index in xrange(1, 6):
            f = Feed(name='test feed #%s' % index,
                     url='http://test-feed%s.com' % index).save()
            self.article1.update(add_to_set__feeds=f)

            self.article1.reload()

        for index in xrange(6, 11):
            f = Feed(name='test feed #%s' % index,
                     url='http://test-feed%s.com' % index).save()
            self.article2.update(add_to_set__feeds=f)

            self.article2.reload()

    def tearDown(self):
        Article.drop_collection()
        User.drop_collection()
        Read.drop_collection()
        Feed.drop_collection()

    def test_register_duplicate_bare(self):

        self.assertEquals(Article.objects(
                          duplicate_of__exists=False).count(), 3)

        self.article1.register_duplicate(self.article2)

        # needed because feeds are modified in another instance of the
        # same dabase record, via the celery task.
        self.article1.safe_reload()

        self.assertEquals(self.article1.reads.count(), 10)

        self.assertEquals(self.article2.reads.count(), 0)

        self.assertEquals(len(self.article1.feeds), 10)

        self.assertEquals(len(self.article2.feeds), 5)

        self.assertEquals(self.article2.duplicate_of, self.article1)

        self.assertEquals(Article.objects(
                          duplicate_of__exists=True).count(), 1)
        self.assertEquals(Article.objects(
                          duplicate_of__exists=False).count(), 2)

    def test_register_duplicate_not_again(self):

        self.article1.register_duplicate(self.article2)
        self.article1.safe_reload()

        self.assertEquals(self.article2.duplicate_of, self.article1)
Exemplo n.º 28
0
 def tearDown(self):
     Subscription.drop_collection()
     Feed.drop_collection()
     Read.drop_collection()
     Article.drop_collection()
     User.drop_collection()
Exemplo n.º 29
0
 def tearDown(self):
     Subscription.drop_collection()
     Feed.drop_collection()
     Read.drop_collection()
     Article.drop_collection()
     User.drop_collection()
Exemplo n.º 30
0
class FeedsTest(TestCase):

    def setUp(self):

        # NOTE: we need real web pages, else the absolutization won't work or
        # will find duplicates and tests will fail for a real-life reason.
        self.article1 = Article(title='test1',
                                url='http://blog.1flow.io/post/'
                                '59410536612/1flow-blog-has-moved').save()

        self.feed = Feed(name='1flow test feed',
                         url='http://blog.1flow.io/rss').save()

        self.article1.update(add_to_set__feeds=self.feed)
        self.article1.reload()

        # User & Reads creation
        for index in xrange(1, 2):
            username = '******' % index
            du = DjangoUser.objects.create(username=username,
                                           email='*****@*****.**' % username)
            # PG post_save() signal already created the MongoDB user.
            u = du.mongo
            Read(user=u, article=self.article1).save()
            Subscription(user=u, feed=self.feed).save()

        for index in xrange(2, 5):
            username = '******' % index
            du = DjangoUser.objects.create(username=username,
                                           email='*****@*****.**' % username)

    def tearDown(self):
        Subscription.drop_collection()
        Feed.drop_collection()
        Read.drop_collection()
        Article.drop_collection()
        User.drop_collection()

    def test_close(self):

        closed_reason = u'closed for tests'

        self.feed.close(closed_reason)

        self.assertTrue(self.feed.closed)
        self.assertEquals(self.feed.closed_reason, closed_reason)
        self.assertFalse(self.feed.date_closed is None)

        global_feeds_checker()

        self.assertEquals(len(mail.outbox), 1)
        self.assertTrue(u'Reminder: 1 feed(s) closed in last'
                        in mail.outbox[0].subject)
        self.assertTrue(unicode(self.feed) in mail.outbox[0].body)

        #self.assertEqual( mail.outbox[0].to, [ "*****@*****.**" ] )
        #self.assertTrue( "*****@*****.**" in mail.outbox[0].to )

    def test_feeds_creation(self):

        # .setUp() creates one already.
        self.assertEquals(Feed._get_collection().count(), 1)

        feed, created = Feed.create_feeds_from_url(u'http://ntoll.org/')[0]
        self.assertTrue(created)
        self.assertEquals(feed.url, u'http://ntoll.org/rss.xml')
        self.assertEquals(Feed._get_collection().count(), 2)

        # Via the Home Page
        feed, created = Feed.create_feeds_from_url(u'http://www.zdnet.fr/')[0]
        self.assertTrue(created)
        self.assertEquals(feed.url, u'http://www.zdnet.fr/feeds/rss/')
        self.assertEquals(Feed._get_collection().count(), 3)

        # Via the RSS listing page
        feed, created = Feed.create_feeds_from_url(u'http://www.zdnet.fr/services/rss/')[0] # NOQA
        self.assertFalse(created)
        self.assertEquals(feed.url, u'http://www.zdnet.fr/feeds/rss/')
        self.assertEquals(Feed._get_collection().count(), 3)

        # Via the first RSS (raw)
        feed, created = Feed.create_feeds_from_url(u'http://www.zdnet.fr/feeds/rss/')[0] # NOQA
        self.assertFalse(created)
        self.assertEquals(feed.url, u'http://www.zdnet.fr/feeds/rss/')
        self.assertEquals(Feed._get_collection().count(), 3)

        feed, created = Feed.create_feeds_from_url(u'http://www.atlantico.fr/')[0] # NOQA
        self.assertTrue(created)
        self.assertEquals(feed.url, u'http://www.atlantico.fr/rss.xml')
        self.assertEquals(Feed._get_collection().count(), 4)

        feed, created = Feed.create_feeds_from_url(u'http://wordpress.org/')[0]
        self.assertTrue(created)
        self.assertEquals(feed.url, u'http://wordpress.org/news/feed/')
        self.assertEquals(Feed._get_collection().count(), 5)

        # Not created again, even from an article which has the comment feed.
        feed, created = Feed.create_feeds_from_url(u'http://ntoll.org/article/build-a-drogulus')[0] # NOQA
        self.assertFalse(created)
        self.assertEquals(feed.url, u'http://ntoll.org/rss.xml')
        self.assertEquals(Feed._get_collection().count(), 5)

        # This one has been created in .setUp()
        feed, created = Feed.create_feeds_from_url(u'http://blog.1flow.io/')[0]
        self.assertFalse(created)
        self.assertEquals(feed.url, u'http://blog.1flow.io/rss')
        self.assertEquals(Feed._get_collection().count(), 5)

        # No RSS in main page
        self.assertRaises(Exception, Feed.create_feeds_from_url,
                          u'http://www.bbc.co.uk/')
        self.assertEquals(Feed._get_collection().count(), 5)

        # This one has no RSS anywhere, it won't create anything
        self.assertRaises(Exception, Feed.create_feeds_from_url,
                          u'http://www.tumblr.com/blog/1flowio')
        self.assertEquals(Feed._get_collection().count(), 5)
Exemplo n.º 31
0
 def tearDown(self):
     WebSite.drop_collection()
     Article.drop_collection()
Exemplo n.º 32
0
class ArticleDuplicateTest(TestCase):
    def setUp(self):

        # NOTE: we need real web pages, else the absolutization won't work or
        # will find duplicates and tests will fail for a real-life reason.
        self.article1 = Article(title='test1',
                                url='http://blog.1flow.io/post/'
                                '59410536612/1flow-blog-has-moved').save()
        self.article2 = Article(title='test2',
                                url='http://obi.1flow.io/fr/').save()
        self.article3 = Article(title='test3',
                                url='http://obi.1flow.io/en/').save()

        # User & Reads creation
        for index in xrange(1, 6):
            username = '******' % index
            du = DjangoUser.objects.create(username=username,
                                           email='*****@*****.**' % username)
            # NOTE: the mongoDB user is created automatically. If you
            # try to create one it will fail with duplicate index error.
            u = du.mongo
            Read(user=u, article=self.article1).save()

        for index in xrange(6, 11):
            username = '******' % index
            du = DjangoUser.objects.create(username=username,
                                           email='*****@*****.**' % username)
            u = du.mongo
            Read(user=u, article=self.article2).save()

        # Feeds creation
        for index in xrange(1, 6):
            f = Feed(name='test feed #%s' % index,
                     url='http://test-feed%s.com' % index).save()
            self.article1.update(add_to_set__feeds=f)

            self.article1.reload()

        for index in xrange(6, 11):
            f = Feed(name='test feed #%s' % index,
                     url='http://test-feed%s.com' % index).save()
            self.article2.update(add_to_set__feeds=f)

            self.article2.reload()

    def tearDown(self):
        Article.drop_collection()
        User.drop_collection()
        Read.drop_collection()
        Feed.drop_collection()

    def test_register_duplicate_bare(self):

        self.assertEquals(
            Article.objects(duplicate_of__exists=False).count(), 3)

        self.article1.register_duplicate(self.article2)

        # needed because feeds are modified in another instance of the
        # same dabase record, via the celery task.
        self.article1.safe_reload()

        self.assertEquals(self.article1.reads.count(), 10)

        self.assertEquals(self.article2.reads.count(), 0)

        self.assertEquals(len(self.article1.feeds), 10)

        self.assertEquals(len(self.article2.feeds), 5)

        self.assertEquals(self.article2.duplicate_of, self.article1)

        self.assertEquals(
            Article.objects(duplicate_of__exists=True).count(), 1)
        self.assertEquals(
            Article.objects(duplicate_of__exists=False).count(), 2)

    def test_register_duplicate_not_again(self):

        self.article1.register_duplicate(self.article2)
        self.article1.safe_reload()

        self.assertEquals(self.article2.duplicate_of, self.article1)
Exemplo n.º 33
0
 def tearDown(self):
     Article.drop_collection()
     Feed.drop_collection()
Exemplo n.º 34
0
 def tearDown(self):
     Article.drop_collection()
     User.drop_collection()
     Read.drop_collection()
     Feed.drop_collection()
Exemplo n.º 35
0
class AbsolutizeTest(TestCase):
    def setUp(self):

        #Article.drop_collection()
        #Feed.drop_collection()

        self.article1 = Article(
            title=u'test1',
            url=
            u'http://rss.feedsportal.com/c/707/f/9951/s/2b27496a/l/0L0Sreseaux0Etelecoms0Bnet0Cactualites0Clire0Elancement0Emondial0Edu0Esamsung0Egalaxy0Es40E25980A0Bhtml/story01.htm'
        ).save()  # NOQA
        self.article2 = Article(
            title=u'test2',
            url=
            u'http://feedproxy.google.com/~r/francaistechcrunch/~3/hEIhLwVyEEI/'
        ).save()  # NOQA
        self.article3 = Article(
            title=u'test3',
            url=u'http://obi.1flow.io/absolutize_test_401').save()  # NOQA
        self.article4 = Article(
            title=u'test4',
            url=u'http://host.non.exixstentz.com/absolutize_test').save(
            )  # NOQA
        self.article5 = Article(
            title=u'test5',
            url=u'http://1flow.io/absolutize_test_404').save()  # NOQA

    def tearDown(self):
        Article.drop_collection()
        Feed.drop_collection()

    def test_absolutize(self):
        self.article1.absolutize_url()
        self.assertEquals(
            self.article1.url,
            u'http://www.reseaux-telecoms.net/actualites/lire-lancement-mondial-du-samsung-galaxy-s4-25980.html'
        )  # NOQA
        self.assertEquals(self.article1.url_absolute, True)
        self.assertEquals(self.article1.url_error, '')

        self.article2.absolutize_url()
        self.assertEquals(
            self.article2.url,
            u'http://techcrunch.com/2013/05/18/hell-no-tumblr-users-wont-go-to-yahoo/'
        )  # NOQA
        self.assertEquals(self.article2.url_absolute, True)
        self.assertEquals(self.article2.url_error, '')

    def test_absolutize_errors(self):

        #
        # NOTE: if a PROXY is set, the reasons word cases can vary.
        # eg. 'Not Found' (via Squid) instead of 'NOT FOUND' (direct answer).
        #

        self.article3.absolutize_url()
        self.assertEquals(self.article3.url,
                          u'http://obi.1flow.io/absolutize_test_401')  # NOQA
        self.assertEquals(self.article3.url_absolute, False)
        self.assertEquals(
            self.article3.url_error,
            u'HTTP Error 401 (Unauthorized) while resolving http://obi.1flow.io/absolutize_test_401.'
        )  # NOQA

        self.article5.absolutize_url()
        self.assertEquals(self.article5.url,
                          u'http://1flow.io/absolutize_test_404')  # NOQA
        self.assertEquals(self.article5.url_absolute, False)
        self.assertEquals(
            self.article5.url_error,
            u'HTTP Error 404 (NOT FOUND) while resolving http://1flow.io/absolutize_test_404.'
        )  # NOQA

        self.article4.absolutize_url()
        self.assertEquals(
            self.article4.url,
            u'http://host.non.exixstentz.com/absolutize_test')  # NOQA
        self.assertEquals(self.article4.url_absolute, False)
        self.assertEquals(
            self.article4.url_error[:108],
            u"HTTPConnectionPool(host='host.non.exixstentz.com', port=80): Max retries exceeded with url: /absolutize_test"
        )  # NOQA