def test_request_fingerprint(self): """Test if customization of request_fingerprint method will change output of request_seen. """ r1 = Request('http://scrapytest.org/index.html') r2 = Request('http://scrapytest.org/INDEX.html') dupefilter = RFPDupeFilter() dupefilter.open() assert not dupefilter.request_seen(r1) assert not dupefilter.request_seen(r2) dupefilter.close('finished') class CaseInsensitiveRFPDupeFilter(RFPDupeFilter): def request_fingerprint(self, request): fp = hashlib.sha1() fp.update(to_bytes(request.url.lower())) return fp.hexdigest() case_insensitive_dupefilter = CaseInsensitiveRFPDupeFilter() case_insensitive_dupefilter.open() assert not case_insensitive_dupefilter.request_seen(r1) assert case_insensitive_dupefilter.request_seen(r2) case_insensitive_dupefilter.close('finished')
def test_request_fingerprint(self): """Test if customization of request_fingerprint method will change output of request_seen. """ r1 = Request('http://scrapytest.org/index.html') r2 = Request('http://scrapytest.org/INDEX.html') dupefilter = RFPDupeFilter() dupefilter.open() assert not dupefilter.request_seen(r1) assert not dupefilter.request_seen(r2) dupefilter.close('finished') class CaseInsensitiveRFPDupeFilter(RFPDupeFilter): def request_fingerprint(self, request): fp = hashlib.sha1() fp.update(request.url.lower()) return fp.hexdigest() case_insensitive_dupefilter = CaseInsensitiveRFPDupeFilter() case_insensitive_dupefilter.open() assert not case_insensitive_dupefilter.request_seen(r1) assert case_insensitive_dupefilter.request_seen(r2) case_insensitive_dupefilter.close('finished')
def test_filter(self): dupefilter = RFPDupeFilter() dupefilter.open() r1 = Request('http://scrapytest.org/1') r2 = Request('http://scrapytest.org/2') r3 = Request('http://scrapytest.org/2') assert not dupefilter.request_seen(r1) assert dupefilter.request_seen(r1) assert not dupefilter.request_seen(r2) assert dupefilter.request_seen(r3) dupefilter.close('finished')
def test_dupefilter_path(self): r1 = Request('http://scrapytest.org/1') r2 = Request('http://scrapytest.org/2') path = tempfile.mkdtemp() try: df = RFPDupeFilter(path) df.open() assert not df.request_seen(r1) assert df.request_seen(r1) df.close('finished') df2 = RFPDupeFilter(path) df2.open() assert df2.request_seen(r1) assert not df2.request_seen(r2) assert df2.request_seen(r2) df2.close('finished') finally: shutil.rmtree(path)
def test_seenreq_newlines(self): """ Checks against adding duplicate \r to line endings on Windows platforms. """ r1 = Request('http://scrapytest.org/1') path = tempfile.mkdtemp() try: df = RFPDupeFilter(path) df.open() df.request_seen(r1) df.close('finished') with open(os.path.join(path, 'requests.seen'), 'rb') as seen_file: line = next(seen_file).decode() assert not line.endswith('\r\r\n') if sys.platform == 'win32': assert line.endswith('\r\n') else: assert line.endswith('\n') finally: shutil.rmtree(path)