class TestReferrerOnRedirect(TestRefererMiddleware): settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.UnsafeUrlPolicy'} scenarii = [ ( 'http://scrapytest.org/1', # parent 'http://scrapytest.org/2', # target ( # redirections: code, URL (301, 'http://scrapytest.org/3'), (301, 'http://scrapytest.org/4'), ), b'http://scrapytest.org/1', # expected initial referer b'http://scrapytest.org/1', # expected referer for the redirection request ), ( 'https://scrapytest.org/1', 'https://scrapytest.org/2', ( # redirecting to non-secure URL (301, 'http://scrapytest.org/3'), ), b'https://scrapytest.org/1', b'https://scrapytest.org/1', ), ( 'https://scrapytest.org/1', 'https://scrapytest.com/2', ( # redirecting to non-secure URL: different origin (301, 'http://scrapytest.com/3'), ), b'https://scrapytest.org/1', b'https://scrapytest.org/1', ), ] def setUp(self): self.spider = Spider('foo') settings = Settings(self.settings) self.referrermw = RefererMiddleware(settings) self.redirectmw = RedirectMiddleware(settings) def test(self): for parent, target, redirections, init_referrer, final_referrer in self.scenarii: response = self.get_response(parent) request = self.get_request(target) out = list(self.referrermw.process_spider_output(response, [request], self.spider)) self.assertEqual(out[0].headers.get('Referer'), init_referrer) for status, url in redirections: response = Response(request.url, headers={'Location': url}, status=status) request = self.redirectmw.process_response(request, response, self.spider) self.referrermw.request_scheduled(request, self.spider) assert isinstance(request, Request) self.assertEqual(request.headers.get('Referer'), final_referrer)
class TestRefererMiddleware(TestCase): req_meta = {} resp_headers = {} settings = {} scenarii = [ ('http://scrapytest.org', 'http://scrapytest.org/', b'http://scrapytest.org'), ] def setUp(self): self.spider = Spider('foo') settings = Settings(self.settings) self.mw = RefererMiddleware(settings) def get_request(self, target): return Request(target, meta=self.req_meta) def get_response(self, origin): return Response(origin, headers=self.resp_headers) def test(self): for origin, target, referrer in self.scenarii: response = self.get_response(origin) request = self.get_request(target) out = list( self.mw.process_spider_output(response, [request], self.spider)) self.assertEqual(out[0].headers.get('Referer'), referrer)
class TestRefererMiddleware(TestCase): req_meta = {} resp_headers = {} settings = {} scenarii = [ ('http://scrapytest.org', 'http://scrapytest.org/', b'http://scrapytest.org'), ] def setUp(self): self.spider = Spider('foo') settings = Settings(self.settings) self.mw = RefererMiddleware(settings) def get_request(self, target): return Request(target, meta=self.req_meta) def get_response(self, origin): return Response(origin, headers=self.resp_headers) def test(self): for origin, target, referrer in self.scenarii: response = self.get_response(origin) request = self.get_request(target) out = list(self.mw.process_spider_output(response, [request], self.spider)) self.assertEqual(out[0].headers.get('Referer'), referrer)
def test(self): origin = 'http://www.scrapy.org' target = 'http://www.example.com' for settings, response_headers, request_meta, policy_class, check_warning in self.params[3:]: mw = RefererMiddleware(Settings(settings)) response = Response(origin, headers=response_headers) request = Request(target, meta=request_meta) with warnings.catch_warnings(record=True) as w: policy = mw.policy(response, request) self.assertIsInstance(policy, policy_class) if check_warning: self.assertEqual(len(w), 1) self.assertEqual(w[0].category, RuntimeWarning, w[0].message)
def test(self): origin = 'http://www.scrapy.org' target = 'http://www.example.com' for settings, response_headers, request_meta, policy_class, check_warning in self.params[3:]: spider = Spider('foo') mw = RefererMiddleware(Settings(settings)) response = Response(origin, headers=response_headers) request = Request(target, meta=request_meta) with warnings.catch_warnings(record=True) as w: policy = mw.policy(response, request) self.assertIsInstance(policy, policy_class) if check_warning: self.assertEqual(len(w), 1) self.assertEqual(w[0].category, RuntimeWarning, w[0].message)
class TestRefererMiddleware(TestCase): def setUp(self): self.spider = Spider('foo') self.mw = RefererMiddleware() def test_process_spider_output(self): res = Response('http://scrapytest.org') reqs = [Request('http://scrapytest.org/')] out = list(self.mw.process_spider_output(res, reqs, self.spider)) self.assertEquals(out[0].headers.get('Referer'), 'http://scrapytest.org')
def test_valid_name_casevariants(self): for s, p in [ (POLICY_SCRAPY_DEFAULT, DefaultReferrerPolicy), (POLICY_NO_REFERRER, NoReferrerPolicy), (POLICY_NO_REFERRER_WHEN_DOWNGRADE, NoReferrerWhenDowngradePolicy), (POLICY_SAME_ORIGIN, SameOriginPolicy), (POLICY_ORIGIN, OriginPolicy), (POLICY_STRICT_ORIGIN, StrictOriginPolicy), (POLICY_ORIGIN_WHEN_CROSS_ORIGIN, OriginWhenCrossOriginPolicy), (POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN, StrictOriginWhenCrossOriginPolicy), (POLICY_UNSAFE_URL, UnsafeUrlPolicy), ]: settings = Settings({'REFERRER_POLICY': s.upper()}) mw = RefererMiddleware(settings) self.assertEqual(mw.default_policy, p)
def setUp(self): self.spider = Spider('foo') settings = Settings(self.settings) self.referrermw = RefererMiddleware(settings) self.redirectmw = RedirectMiddleware(settings)
def test_invalid_name(self): settings = Settings({'REFERRER_POLICY': 'some-custom-unknown-policy'}) with self.assertRaises(RuntimeError): RefererMiddleware(settings)
def setUp(self): self.spider = Spider('foo') settings = Settings(self.settings) self.mw = RefererMiddleware(settings)
def setUp(self): self.spider = Spider('foo') self.mw = RefererMiddleware()