def test_request_counter(self): import threading reset_request_counter() grab = build_grab() grab.go(self.server.get_url()) self.assertEqual(grab.request_counter, 1) grab.go(self.server.get_url()) self.assertEqual(grab.request_counter, 2) def func(): grab = build_grab() grab.go(self.server.get_url()) # Make 10 requests in concurrent threads threads = [] for _ in six.moves.range(10): thread = threading.Thread(target=func) threads.append(thread) thread.start() for thread in threads: thread.join() grab.go(self.server.get_url()) self.assertEqual(grab.request_counter, 13)
def test_log_option(self): with temp_dir() as tmp_dir: reset_request_counter() log_file_path = os.path.join(tmp_dir, 'lograb.html') grab = build_grab() grab.setup(log_file=log_file_path) self.server.response['get.data'] = 'omsk' self.assertEqual(os.listdir(tmp_dir), []) grab.go(self.server.get_url()) self.assertEqual(os.listdir(tmp_dir), ['lograb.html']) with open(log_file_path) as inp: self.assertEqual(inp.read(), 'omsk')
def test_log_dir_response_content(self): with temp_dir() as tmp_dir: reset_request_counter() grab = build_grab() grab.setup(log_dir=tmp_dir) self.server.response['get.data'] = 'omsk' self.server.response['headers'] = [('X-Engine', 'PHP')] self.assertEqual(os.listdir(tmp_dir), []) grab.go(self.server.get_url()) self.assertEqual(sorted(os.listdir(tmp_dir)), ['01.html', '01.log']) with open(os.path.join(tmp_dir, '01.log')) as inp: log_file_content = inp.read() self.assertTrue('x-engine' in log_file_content.lower())
def test_log_dir_request_content_is_empty(self): with temp_dir() as tmp_dir: reset_request_counter() grab = build_grab() grab.setup(log_dir=tmp_dir) grab.setup(headers={'X-Name': 'spider'}, post='xxxPost') self.assertEqual(os.listdir(tmp_dir), []) grab.go(self.server.get_url()) self.assertEqual(sorted(os.listdir(tmp_dir)), ['01.html', '01.log']) with open(os.path.join(tmp_dir, '01.log')) as inp: log_file_content = inp.read() self.assertFalse('X-Name' in log_file_content) self.assertFalse('xxxPost' in log_file_content)
def test_log_dir_option(self): with temp_dir() as tmp_dir: reset_request_counter() grab = build_grab() grab.setup(log_dir=tmp_dir) self.server.response_once['get.data'] = 'omsk1' self.server.response['get.data'] = 'omsk2' self.assertEqual(os.listdir(tmp_dir), []) grab.go(self.server.get_url()) grab.go(self.server.get_url()) self.assertEqual(sorted(os.listdir(tmp_dir)), ['01.html', '01.log', '02.html', '02.log']) with open(os.path.join(tmp_dir, '01.html')) as inp: self.assertEqual(inp.read(), 'omsk1') with open(os.path.join(tmp_dir, '02.html')) as inp: self.assertEqual(inp.read(), 'omsk2')
def test_log_dir_request_content_headers_and_post(self): with temp_dir() as tmp_dir: reset_request_counter() grab = build_grab() grab.setup(log_dir=tmp_dir, debug=True) grab.setup(headers={'X-Name': 'spider'}, post={'xxx': 'Post'}) self.assertEqual(os.listdir(tmp_dir), []) grab.go(self.server.get_url()) self.assertEqual(sorted(os.listdir(tmp_dir)), ['01.html', '01.log']) with open(os.path.join(tmp_dir, '01.log')) as inp: log_file_content = inp.read() #if not 'x-name' in log_file_content.lower(): # print('CONTENT OF 01.log:') # print(log_file_content) self.assertTrue('x-name' in log_file_content.lower()) self.assertTrue('xxx=post' in log_file_content.lower())
def test_log_dir_response_network_error(self): with temp_dir() as tmp_dir: reset_request_counter() grab = build_grab() grab.setup(log_dir=tmp_dir, timeout=1, user_agent='Perl', debug=True) self.server.response['get.data'] = 'omsk' self.server.response['headers'] = [('X-Engine', 'PHP')] self.server.response['sleep'] = 2 self.assertEqual(os.listdir(tmp_dir), []) try: grab.go(self.server.get_url()) except GrabTimeoutError: pass self.assertEqual(sorted(os.listdir(tmp_dir)), ['01.html', '01.log']) with open(os.path.join(tmp_dir, '01.log')) as inp: log_file_content = inp.read() self.assertTrue('user-agent: perl' in log_file_content.lower())
def test_log_dir_response_content_thread(self): with temp_dir() as tmp_dir: reset_request_counter() grab = build_grab() grab.setup(log_dir=tmp_dir) self.server.response['get.data'] = 'omsk' self.server.response['headers'] = [('X-Engine', 'PHP')] self.assertEqual(os.listdir(tmp_dir), []) def func(): grab.go(self.server.get_url()) thread = threading.Thread(target=func) thread.start() thread.join() files = os.listdir(tmp_dir) self.assertEqual(2, len([x for x in files if '01-thread' in x])) fname = [x for x in files if x.endswith('.log')][0] with open(os.path.join(tmp_dir, fname)) as inp: log_file_content = inp.read() self.assertTrue('x-engine' in log_file_content.lower())