def test_request_counter(self): import threading reset_request_counter() g = build_grab() g.go(self.server.get_url()) self.assertEqual(g.request_counter, 1) g.go(self.server.get_url()) self.assertEqual(g.request_counter, 2) def func(): g = build_grab() g.go(self.server.get_url()) # Make 10 requests in concurrent threads threads = [] for x in six.moves.range(10): th = threading.Thread(target=func) threads.append(th) th.start() for th in threads: th.join() g.go(self.server.get_url()) self.assertEqual(g.request_counter, 13)
def test_log_option(self): clear_directory(TMP_DIR) reset_request_counter() log_file_path = os.path.join(TMP_DIR, 'log.html') g = build_grab() g.setup(log_file=log_file_path) self.server.response['get.data'] = 'omsk' self.assertEqual(os.listdir(TMP_DIR), []) g.go(self.server.get_url()) self.assertEqual(os.listdir(TMP_DIR), ['log.html']) self.assertEqual(open(log_file_path).read(), 'omsk')
def test_log_option(self): clear_directory(TMP_DIR) reset_request_counter() log_file_path = os.path.join(TMP_DIR, 'log.html') g = build_grab() g.setup(log_file=log_file_path) SERVER.RESPONSE['get'] = 'omsk' self.assertEqual(os.listdir(TMP_DIR), []) g.go(SERVER.BASE_URL) self.assertEqual(os.listdir(TMP_DIR), ['log.html']) self.assertEqual(open(log_file_path).read(), 'omsk')
def test_log_option(self): with temp_dir() as tmp_dir: reset_request_counter() log_file_path = os.path.join(tmp_dir, 'log.html') g = build_grab() g.setup(log_file=log_file_path) self.server.response['get.data'] = 'omsk' self.assertEqual(os.listdir(tmp_dir), []) g.go(self.server.get_url()) self.assertEqual(os.listdir(tmp_dir), ['log.html']) self.assertEqual(open(log_file_path).read(), 'omsk')
def test_log_dir_request_content_is_empty(self): clear_directory(TMP_DIR) reset_request_counter() g = build_grab() g.setup(log_dir=TMP_DIR) g.setup(headers={'X-Name': 'spider'}, post='xxxPost') self.assertEqual(os.listdir(TMP_DIR), []) g.go(self.server.get_url()) self.assertEqual(sorted(os.listdir(TMP_DIR)), ['01.html', '01.log']) log_file_content = open(os.path.join(TMP_DIR, '01.log')).read() self.assertFalse('X-Name' in log_file_content) self.assertFalse('xxxPost' in log_file_content)
def test_log_dir_response_content(self): clear_directory(TMP_DIR) reset_request_counter() g = build_grab() g.setup(log_dir=TMP_DIR) SERVER.RESPONSE['get'] = 'omsk' SERVER.RESPONSE['headers'] = [('X-Engine', 'PHP')] self.assertEqual(os.listdir(TMP_DIR), []) g.go(SERVER.BASE_URL) self.assertEqual(sorted(os.listdir(TMP_DIR)), ['01.html', '01.log']) log_file_content = open(os.path.join(TMP_DIR, '01.log')).read() self.assertTrue('X-Engine' in log_file_content)
def test_log_dir_request_content_is_empty(self): with temp_dir() as tmp_dir: reset_request_counter() g = build_grab() g.setup(log_dir=tmp_dir) g.setup(headers={'X-Name': 'spider'}, post='xxxPost') self.assertEqual(os.listdir(tmp_dir), []) g.go(self.server.get_url()) self.assertEqual(sorted(os.listdir(tmp_dir)), ['01.html', '01.log']) log_file_content = open(os.path.join(tmp_dir, '01.log')).read() self.assertFalse('X-Name' in log_file_content) self.assertFalse('xxxPost' in log_file_content)
def test_log_dir_response_content(self): with temp_dir() as tmp_dir: reset_request_counter() g = build_grab() g.setup(log_dir=tmp_dir) self.server.response['get.data'] = 'omsk' self.server.response['headers'] = [('X-Engine', 'PHP')] self.assertEqual(os.listdir(tmp_dir), []) g.go(self.server.get_url()) self.assertEqual(sorted(os.listdir(tmp_dir)), ['01.html', '01.log']) log_file_content = open(os.path.join(tmp_dir, '01.log')).read() self.assertTrue('x-engine' in log_file_content.lower())
def test_log_dir_request_content_headers_and_post(self): clear_directory(TMP_DIR) reset_request_counter() g = build_grab() g.setup(log_dir=TMP_DIR, debug=True) g.setup(headers={'X-Name': 'spider'}, post={'xxx': 'Post'}) self.assertEqual(os.listdir(TMP_DIR), []) g.go(SERVER.BASE_URL) self.assertEqual(sorted(os.listdir(TMP_DIR)), ['01.html', '01.log']) log_file_content = open(os.path.join(TMP_DIR, '01.log')).read() self.assertTrue('X-Name' in log_file_content) self.assertTrue('xxx=Post' in log_file_content)
def test_log_dir_option(self): clear_directory(TMP_DIR) reset_request_counter() g = build_grab() g.setup(log_dir=TMP_DIR) SERVER.RESPONSE_ONCE['get'] = 'omsk1' SERVER.RESPONSE['get'] = 'omsk2' self.assertEqual(os.listdir(TMP_DIR), []) g.go(SERVER.BASE_URL) g.go(SERVER.BASE_URL) self.assertEqual(sorted(os.listdir(TMP_DIR)), ['01.html', '01.log', '02.html', '02.log']) self.assertEqual(open(os.path.join(TMP_DIR, '01.html')).read(), 'omsk1') self.assertEqual(open(os.path.join(TMP_DIR, '02.html')).read(), 'omsk2')
def test_log_dir_request_content_headers_and_post(self): with temp_dir() as tmp_dir: reset_request_counter() g = build_grab() g.setup(log_dir=tmp_dir, debug=True) g.setup(headers={'X-Name': 'spider'}, post={'xxx': 'Post'}) self.assertEqual(os.listdir(tmp_dir), []) g.go(self.server.get_url()) self.assertEqual(sorted(os.listdir(tmp_dir)), ['01.html', '01.log']) log_file_content = open(os.path.join(tmp_dir, '01.log')).read() #if not 'x-name' in log_file_content.lower(): # print('CONTENT OF 01.log:') # print(log_file_content) self.assertTrue('x-name' in log_file_content.lower()) self.assertTrue('xxx=post' in log_file_content.lower())
def test_log_dir_option(self): clear_directory(TMP_DIR) reset_request_counter() g = build_grab() g.setup(log_dir=TMP_DIR) self.server.response_once['get.data'] = 'omsk1' self.server.response['get.data'] = 'omsk2' self.assertEqual(os.listdir(TMP_DIR), []) g.go(self.server.get_url()) g.go(self.server.get_url()) self.assertEqual(sorted(os.listdir(TMP_DIR)), ['01.html', '01.log', '02.html', '02.log']) self.assertEqual( open(os.path.join(TMP_DIR, '01.html')).read(), 'omsk1') self.assertEqual( open(os.path.join(TMP_DIR, '02.html')).read(), 'omsk2')
def test_log_dir_option(self): with temp_dir() as tmp_dir: reset_request_counter() g = build_grab() g.setup(log_dir=tmp_dir) self.server.response_once['get.data'] = 'omsk1' self.server.response['get.data'] = 'omsk2' self.assertEqual(os.listdir(tmp_dir), []) g.go(self.server.get_url()) g.go(self.server.get_url()) self.assertEqual(sorted(os.listdir(tmp_dir)), ['01.html', '01.log', '02.html', '02.log']) self.assertEqual(open(os.path.join(tmp_dir, '01.html')).read(), 'omsk1') self.assertEqual(open(os.path.join(tmp_dir, '02.html')).read(), 'omsk2')
def test_log_dir_option(self): clear_directory(TMP_DIR) reset_request_counter() g = build_grab() g.setup(log_dir=TMP_DIR) self.server.response_once['get.data'] = 'omsk1' self.server.response['get.data'] = 'omsk2' self.assertEqual(os.listdir(TMP_DIR), []) g.go(self.server.get_url()) g.go(self.server.get_url()) self.assertEqual(sorted(os.listdir(TMP_DIR)), ['01.html', '01.log', '02.html', '02.log']) self.assertEqual(open(os.path.join(TMP_DIR, '01.html')).read(), 'omsk1') self.assertEqual(open(os.path.join(TMP_DIR, '02.html')).read(), 'omsk2')
def test_log_dir_option(self): with temp_dir() as tmp_dir: reset_request_counter() g = build_grab() g.setup(log_dir=tmp_dir) self.server.response_once['get.data'] = 'omsk1' self.server.response['get.data'] = 'omsk2' self.assertEqual(os.listdir(tmp_dir), []) g.go(self.server.get_url()) g.go(self.server.get_url()) self.assertEqual(sorted(os.listdir(tmp_dir)), ['01.html', '01.log', '02.html', '02.log']) self.assertEqual( open(os.path.join(tmp_dir, '01.html')).read(), 'omsk1') self.assertEqual( open(os.path.join(tmp_dir, '02.html')).read(), 'omsk2')
def test_log_dir_response_network_error(self): with temp_dir() as tmp_dir: reset_request_counter() g = build_grab() g.setup(log_dir=tmp_dir, timeout=1, user_agent='Perl', debug=True) self.server.response['get.data'] = 'omsk' self.server.response['headers'] = [('X-Engine', 'PHP')] self.server.response['sleep'] = 2 self.assertEqual(os.listdir(tmp_dir), []) try: g.go(self.server.get_url()) except GrabTimeoutError: pass self.assertEqual(sorted(os.listdir(tmp_dir)), ['01.html', '01.log']) log_file_content = open(os.path.join(tmp_dir, '01.log')).read() self.assertTrue('user-agent: perl' in log_file_content.lower())
def test_log_dir_response_content_thread(self): with temp_dir() as tmp_dir: reset_request_counter() g = build_grab() g.setup(log_dir=tmp_dir) self.server.response['get.data'] = 'omsk' self.server.response['headers'] = [('X-Engine', 'PHP')] self.assertEqual(os.listdir(tmp_dir), []) def func(): g.go(self.server.get_url()) th = threading.Thread(target=func) th.start() th.join() files = os.listdir(tmp_dir) self.assertEqual(2, len([x for x in files if '01-thread' in x])) fname = [x for x in files if x.endswith('.log')][0] log_file_content = open(os.path.join(tmp_dir, fname)).read() self.assertTrue('x-engine' in log_file_content.lower())