Exemplo n.º 1
0
class TestReferrerOnRedirect(TestRefererMiddleware):

    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.UnsafeUrlPolicy'}
    scenarii = [
        (
            'http://scrapytest.org/1',      # parent
            'http://scrapytest.org/2',      # target
            (
                # redirections: code, URL
                (301, 'http://scrapytest.org/3'),
                (301, 'http://scrapytest.org/4'),
            ),
            b'http://scrapytest.org/1',  # expected initial referer
            b'http://scrapytest.org/1',  # expected referer for the redirection request
        ),
        (
            'https://scrapytest.org/1',
            'https://scrapytest.org/2',
            (
                # redirecting to non-secure URL
                (301, 'http://scrapytest.org/3'),
            ),
            b'https://scrapytest.org/1',
            b'https://scrapytest.org/1',
        ),
        (
            'https://scrapytest.org/1',
            'https://scrapytest.com/2',
            (
                # redirecting to non-secure URL: different origin
                (301, 'http://scrapytest.com/3'),
            ),
            b'https://scrapytest.org/1',
            b'https://scrapytest.org/1',
        ),
    ]

    def setUp(self):
        self.spider = Spider('foo')
        settings = Settings(self.settings)
        self.referrermw = RefererMiddleware(settings)
        self.redirectmw = RedirectMiddleware(settings)

    def test(self):

        for parent, target, redirections, init_referrer, final_referrer in self.scenarii:
            response = self.get_response(parent)
            request = self.get_request(target)

            out = list(self.referrermw.process_spider_output(response, [request], self.spider))
            self.assertEqual(out[0].headers.get('Referer'), init_referrer)

            for status, url in redirections:
                response = Response(request.url, headers={'Location': url}, status=status)
                request = self.redirectmw.process_response(request, response, self.spider)
                self.referrermw.request_scheduled(request, self.spider)

            assert isinstance(request, Request)
            self.assertEqual(request.headers.get('Referer'), final_referrer)
class TestReferrerOnRedirect(TestRefererMiddleware):

    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.UnsafeUrlPolicy'}
    scenarii = [
        (   'http://scrapytest.org/1',      # parent
            'http://scrapytest.org/2',      # target
            (
                # redirections: code, URL
                (301, 'http://scrapytest.org/3'),
                (301, 'http://scrapytest.org/4'),
            ),
            b'http://scrapytest.org/1', # expected initial referer
            b'http://scrapytest.org/1', # expected referer for the redirection request
        ),
        (   'https://scrapytest.org/1',
            'https://scrapytest.org/2',
            (
                # redirecting to non-secure URL
                (301, 'http://scrapytest.org/3'),
            ),
            b'https://scrapytest.org/1',
            b'https://scrapytest.org/1',
        ),
        (   'https://scrapytest.org/1',
            'https://scrapytest.com/2',
            (
                # redirecting to non-secure URL: different origin
                (301, 'http://scrapytest.com/3'),
            ),
            b'https://scrapytest.org/1',
            b'https://scrapytest.org/1',
        ),
    ]

    def setUp(self):
        self.spider = Spider('foo')
        settings = Settings(self.settings)
        self.referrermw = RefererMiddleware(settings)
        self.redirectmw = RedirectMiddleware(settings)

    def test(self):

        for parent, target, redirections, init_referrer, final_referrer in self.scenarii:
            response = self.get_response(parent)
            request = self.get_request(target)

            out = list(self.referrermw.process_spider_output(response, [request], self.spider))
            self.assertEqual(out[0].headers.get('Referer'), init_referrer)

            for status, url in redirections:
                response = Response(request.url, headers={'Location': url}, status=status)
                request = self.redirectmw.process_response(request, response, self.spider)
                self.referrermw.request_scheduled(request, self.spider)

            assert isinstance(request, Request)
            self.assertEqual(request.headers.get('Referer'), final_referrer)
Exemplo n.º 3
0
class TestRefererMiddleware(TestCase):

    req_meta = {}
    resp_headers = {}
    settings = {}
    scenarii = [
        ('http://scrapytest.org', 'http://scrapytest.org/',
         b'http://scrapytest.org'),
    ]

    def setUp(self):
        self.spider = Spider('foo')
        settings = Settings(self.settings)
        self.mw = RefererMiddleware(settings)

    def get_request(self, target):
        return Request(target, meta=self.req_meta)

    def get_response(self, origin):
        return Response(origin, headers=self.resp_headers)

    def test(self):

        for origin, target, referrer in self.scenarii:
            response = self.get_response(origin)
            request = self.get_request(target)
            out = list(
                self.mw.process_spider_output(response, [request],
                                              self.spider))
            self.assertEqual(out[0].headers.get('Referer'), referrer)
class TestRefererMiddleware(TestCase):

    req_meta = {}
    resp_headers = {}
    settings = {}
    scenarii = [
        ('http://scrapytest.org', 'http://scrapytest.org/',  b'http://scrapytest.org'),
    ]

    def setUp(self):
        self.spider = Spider('foo')
        settings = Settings(self.settings)
        self.mw = RefererMiddleware(settings)

    def get_request(self, target):
        return Request(target, meta=self.req_meta)

    def get_response(self, origin):
        return Response(origin, headers=self.resp_headers)

    def test(self):

        for origin, target, referrer in self.scenarii:
            response = self.get_response(origin)
            request = self.get_request(target)
            out = list(self.mw.process_spider_output(response, [request], self.spider))
            self.assertEqual(out[0].headers.get('Referer'), referrer)
Exemplo n.º 5
0
    def test(self):

        origin = 'http://www.scrapy.org'
        target = 'http://www.example.com'

        for settings, response_headers, request_meta, policy_class, check_warning in self.params[3:]:
            mw = RefererMiddleware(Settings(settings))

            response = Response(origin, headers=response_headers)
            request = Request(target, meta=request_meta)

            with warnings.catch_warnings(record=True) as w:
                policy = mw.policy(response, request)
                self.assertIsInstance(policy, policy_class)

                if check_warning:
                    self.assertEqual(len(w), 1)
                    self.assertEqual(w[0].category, RuntimeWarning, w[0].message)
    def test(self):

        origin = 'http://www.scrapy.org'
        target = 'http://www.example.com'

        for settings, response_headers, request_meta, policy_class, check_warning in self.params[3:]:
            spider = Spider('foo')
            mw = RefererMiddleware(Settings(settings))

            response = Response(origin, headers=response_headers)
            request = Request(target, meta=request_meta)

            with warnings.catch_warnings(record=True) as w:
                policy = mw.policy(response, request)
                self.assertIsInstance(policy, policy_class)

                if check_warning:
                    self.assertEqual(len(w), 1)
                    self.assertEqual(w[0].category, RuntimeWarning, w[0].message)
Exemplo n.º 7
0
class TestRefererMiddleware(TestCase):
    def setUp(self):
        self.spider = Spider('foo')
        self.mw = RefererMiddleware()

    def test_process_spider_output(self):
        res = Response('http://scrapytest.org')
        reqs = [Request('http://scrapytest.org/')]

        out = list(self.mw.process_spider_output(res, reqs, self.spider))
        self.assertEquals(out[0].headers.get('Referer'),
                          'http://scrapytest.org')
class TestRefererMiddleware(TestCase):

    def setUp(self):
        self.spider = Spider('foo')
        self.mw = RefererMiddleware()

    def test_process_spider_output(self):
        res = Response('http://scrapytest.org')
        reqs = [Request('http://scrapytest.org/')]

        out = list(self.mw.process_spider_output(res, reqs, self.spider))
        self.assertEquals(out[0].headers.get('Referer'),
                          'http://scrapytest.org')
Exemplo n.º 9
0
 def test_valid_name_casevariants(self):
     for s, p in [
         (POLICY_SCRAPY_DEFAULT, DefaultReferrerPolicy),
         (POLICY_NO_REFERRER, NoReferrerPolicy),
         (POLICY_NO_REFERRER_WHEN_DOWNGRADE, NoReferrerWhenDowngradePolicy),
         (POLICY_SAME_ORIGIN, SameOriginPolicy),
         (POLICY_ORIGIN, OriginPolicy),
         (POLICY_STRICT_ORIGIN, StrictOriginPolicy),
         (POLICY_ORIGIN_WHEN_CROSS_ORIGIN, OriginWhenCrossOriginPolicy),
         (POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN, StrictOriginWhenCrossOriginPolicy),
         (POLICY_UNSAFE_URL, UnsafeUrlPolicy),
     ]:
         settings = Settings({'REFERRER_POLICY': s.upper()})
         mw = RefererMiddleware(settings)
         self.assertEqual(mw.default_policy, p)
Exemplo n.º 10
0
 def setUp(self):
     self.spider = Spider('foo')
     settings = Settings(self.settings)
     self.referrermw = RefererMiddleware(settings)
     self.redirectmw = RedirectMiddleware(settings)
Exemplo n.º 11
0
 def test_invalid_name(self):
     settings = Settings({'REFERRER_POLICY': 'some-custom-unknown-policy'})
     with self.assertRaises(RuntimeError):
         RefererMiddleware(settings)
Exemplo n.º 12
0
 def setUp(self):
     self.spider = Spider('foo')
     settings = Settings(self.settings)
     self.mw = RefererMiddleware(settings)
 def setUp(self):
     self.spider = Spider('foo')
     settings = Settings(self.settings)
     self.referrermw = RefererMiddleware(settings)
     self.redirectmw = RedirectMiddleware(settings)
 def setUp(self):
     self.spider = Spider('foo')
     settings = Settings(self.settings)
     self.mw = RefererMiddleware(settings)
Exemplo n.º 15
0
 def setUp(self):
     self.spider = Spider('foo')
     self.mw = RefererMiddleware()
 def setUp(self):
     self.spider = Spider('foo')
     self.mw = RefererMiddleware()