def test_app_args_warc(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ self.get_url('/'), '--no-parent', '--recursive', '--page-requisites', '--warc-file', 'test', '-4', '--no-robots', '--no-warc-digests', ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertTrue(os.path.exists('test.warc.gz')) with gzip.GzipFile('test.warc.gz') as in_file: data = in_file.read() self.assertIn(b'FINISHED', data) self.assertEqual(0, exit_code) self.assertGreaterEqual(builder.factory['Statistics'].files, 1)
def test_app_args_warc_dedup(self): arg_parser = AppArgumentParser() with open('dedup.cdx', 'wb') as out_file: out_file.write(b' CDX a k u\n') out_file.write( self.get_url('/static/my_file.txt').encode('ascii') ) out_file.write(b' KQ4IUKATKL63FT5GMAE2YDRV3WERNL34') out_file.write(b' <under-the-deer>\n') args = arg_parser.parse_args([ self.get_url('/static/my_file.txt'), '--no-parent', '--warc-file', 'test', '--no-warc-compression', '-4', '--no-robots', '--warc-dedup', 'dedup.cdx', ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() with open('test.warc', 'rb') as in_file: data = in_file.read() self.assertIn(b'KQ4IUKATKL63FT5GMAE2YDRV3WERNL34', data) self.assertIn(b'Type: revisit', data) self.assertIn(b'<under-the-deer>', data) self.assertEqual(0, exit_code) self.assertGreaterEqual(builder.factory['Statistics'].files, 1)
def test_propagate_ipv4_only_and_no_cert_check_to_youtube_dl(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ 'https://www.youtube.com/watch?v=tPEE9ZwTmy0', '--warc-file', 'test', '--debug', # to capture youtube-dl arguments in the log '--no-warc-compression', '--youtube-dl', '--inet4-only', '--no-check-certificate', '--output-file', 'test.log' ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) with open('test.log', 'rb') as test_log: data = test_log.read() self.assertTrue( re.search(b'Starting process \[\'youtube-dl.*--force-ipv4', data)) self.assertTrue( re.search( b'Starting process \[\'youtube-dl.*--no-check-certificate', data))
def test_app_args_warc_dedup(self): arg_parser = AppArgumentParser() with open('dedup.cdx', 'wb') as out_file: out_file.write(b' CDX a k u\n') out_file.write(self.get_url('/static/my_file.txt').encode('ascii')) out_file.write(b' KQ4IUKATKL63FT5GMAE2YDRV3WERNL34') out_file.write(b' <under-the-deer>\n') args = arg_parser.parse_args([ self.get_url('/static/my_file.txt'), '--no-parent', '--warc-file', 'test', '--no-warc-compression', '-4', '--no-robots', '--warc-dedup', 'dedup.cdx', ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() with open('test.warc', 'rb') as in_file: data = in_file.read() self.assertIn(b'KQ4IUKATKL63FT5GMAE2YDRV3WERNL34', data) self.assertIn(b'Type: revisit', data) self.assertIn(b'<under-the-deer>', data) self.assertEqual(0, exit_code) self.assertGreaterEqual(builder.factory['Statistics'].files, 1)
def test_save_cookie(self): arg_parser = AppArgumentParser() with tempfile.NamedTemporaryFile() as in_file: in_file.write(b'# Kittens\n') in_file.write(b'localhost.local') in_file.write(b'\tFALSE\t/\tFALSE\t9999999999\tisloggedin\t1\n') in_file.write(b'\tFALSE\t/\tFALSE\t\tadmin\t1\n') in_file.flush() args = arg_parser.parse_args([ self.get_url('/some_page/'), '--load-cookies', in_file.name, '--tries', '1', '--save-cookies', 'wpull_test_cookies.txt', ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(1, builder.factory['Statistics'].files) with open('wpull_test_cookies.txt', 'rb') as saved_file: cookie_data = saved_file.read() self.assertIn(b'isloggedin\t1', cookie_data) self.assertNotIn(b'admin\t1', cookie_data)
def test_app_args_post_data(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/post/"), "--post-data", "text=hi"]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code)
def test_big_payload(self): hash_obj = hashlib.sha1(b'foxfoxfox') payload_list = [] for dummy in range(10000): data = hash_obj.digest() hash_obj.update(data) payload_list.append(data) data = hash_obj.digest() payload_list.append(data) expected_payload = b''.join(payload_list) arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url('/big_payload')]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertTrue(os.path.exists('big_payload')) with open('big_payload', 'rb') as in_file: self.assertEqual(expected_payload, in_file.read()) self.assertEqual(0, exit_code) self.assertEqual(1, builder.factory['Statistics'].files)
def test_app_phantomjs_scroll(self): arg_parser = AppArgumentParser() # Change localhost into something else to test proxy args = arg_parser.parse_args([ self.get_url('/static/DEUUEAUGH.html').replace( 'localhost', 'example.invalid'), '-4', '--no-robots', '--phantomjs', '--phantomjs-wait', '0.4', '--phantomjs-scroll', '20', '--no-check-certificate', ]) builder = Builder(args, unit_test=True) builder.factory.class_map['Resolver'] = MockDNSResolver app = builder.build() exit_code = yield from app.run() with open('DEUUEAUGH.html.snapshot.html', 'rb') as in_file: data = in_file.read() self.assertIn(b'Count: 10', data) self.assertEqual(0, exit_code)
def test_timestamping_hit_orig(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args( [self.get_url('/lastmod'), '--timestamping']) filename = os.path.join(self.temp_dir.name, 'lastmod') filename_orig = os.path.join(self.temp_dir.name, 'lastmod') with open(filename, 'wb') as out_file: out_file.write(b'HI') with open(filename_orig, 'wb') as out_file: out_file.write(b'HI') os.utime(filename_orig, (631152000, 631152000)) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) with open(filename, 'rb') as in_file: self.assertEqual(b'HI', in_file.read()) with open(filename_orig, 'rb') as in_file: self.assertEqual(b'HI', in_file.read())
def test_session_cookie(self): arg_parser = AppArgumentParser() with tempfile.NamedTemporaryFile() as in_file: in_file.write(b"# Kittens\n") in_file.write(b"localhost.local") # session cookie, Python style in_file.write(b"\tFALSE\t/\tFALSE\t\ttest\tno\n") # session cookie, Firefox/Wget/Curl style in_file.write(b"\tFALSE\t/\tFALSE\t0\tsessionid\tboxcat\n") in_file.flush() args = arg_parser.parse_args( [ self.get_url("/cookie"), "--load-cookies", in_file.name, "--tries", "1", "--save-cookies", "wpull_test_cookies.txt", "--keep-session-cookies", ] ) builder = Builder(args, unit_test=True) app = builder.build() callback_called = False def callback(pipeline): nonlocal callback_called if callback_called: return callback_called = True self.assertEqual(2, len(builder.factory["CookieJar"])) app.event_dispatcher.add_listener(Application.Event.pipeline_end, callback) exit_code = yield from app.run() self.assertTrue(callback_called) self.assertEqual(0, exit_code) self.assertEqual(1, builder.factory["Statistics"].files) cookies = list(sorted(builder.factory["CookieJar"], key=lambda cookie: cookie.name)) _logger.debug("{0}".format(cookies)) self.assertEqual(2, len(cookies)) self.assertEqual("sessionid", cookies[0].name) self.assertEqual("boxcat", cookies[0].value) self.assertEqual("test", cookies[1].name) self.assertEqual("yes", cookies[1].value) with open("wpull_test_cookies.txt", "rb") as saved_file: cookie_data = saved_file.read() self.assertIn(b"test\tyes", cookie_data)
def test_app_python_plugin_script(self): arg_parser = AppArgumentParser() filename = os.path.join(os.path.dirname(__file__), 'sample_user_scripts', 'extensive.plugin.py') args = arg_parser.parse_args([ self.get_url('/'), self.get_url('/some_page'), self.get_url('/mordor'), 'localhost:1/wolf', '--plugin-script', filename, '--page-requisites', '--reject-regex', '/post/', '--wait', '12', '--retry-connrefused', '--tries', '1' ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() print(list(os.walk('.'))) self.assertEqual(42, exit_code) engine = builder.factory['PipelineSeries'] self.assertEqual(2, engine.concurrency) stats = builder.factory['Statistics'] self.assertEqual(3, stats.files) # duration should be virtually 0 but account for slowness on travis ci self.assertGreater(10.0, stats.duration)
def test_timestamping_hit_orig(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/lastmod"), "--timestamping"]) filename = os.path.join(self.temp_dir.name, "lastmod") filename_orig = os.path.join(self.temp_dir.name, "lastmod") with open(filename, "wb") as out_file: out_file.write(b"HI") with open(filename_orig, "wb") as out_file: out_file.write(b"HI") os.utime(filename_orig, (631152000, 631152000)) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) with open(filename, "rb") as in_file: self.assertEqual(b"HI", in_file.read()) with open(filename_orig, "rb") as in_file: self.assertEqual(b"HI", in_file.read())
def test_save_cookie(self): arg_parser = AppArgumentParser() with tempfile.NamedTemporaryFile() as in_file: in_file.write(b"# Kittens\n") in_file.write(b"localhost.local") in_file.write(b"\tFALSE\t/\tFALSE\t9999999999\tisloggedin\t1\n") in_file.write(b"\tFALSE\t/\tFALSE\t\tadmin\t1\n") in_file.flush() args = arg_parser.parse_args( [ self.get_url("/some_page/"), "--load-cookies", in_file.name, "--tries", "1", "--save-cookies", "wpull_test_cookies.txt", ] ) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(1, builder.factory["Statistics"].files) with open("wpull_test_cookies.txt", "rb") as saved_file: cookie_data = saved_file.read() self.assertIn(b"isloggedin\t1", cookie_data) self.assertNotIn(b"admin\t1", cookie_data)
def main(exit=True, install_tornado_bridge=True, use_signals=True): if install_tornado_bridge: tornado.platform.asyncio.AsyncIOMainLoop().install() arg_parser = AppArgumentParser() args = arg_parser.parse_args() builder = Builder(args) application = builder.build() if use_signals: application.setup_signal_handlers() if args.debug_manhole: import manhole import wpull wpull.wpull_builder = builder manhole.install() exit_code = application.run_sync() if exit: sys.exit(exit_code) else: return exit_code
def test_ssl_bad_certificate(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ self.get_url('/'), '--no-robots', '--no-check-certificate', '--tries', '1' ]) builder = Builder(args, unit_test=True) class MockWebSession(WebSession): @asyncio.coroutine def start(self): raise SSLVerificationError('A very bad certificate!') class MockWebClient(builder.factory.class_map['WebClient']): def session(self, request): return MockWebSession(request, self._http_client, self._redirect_tracker_factory(), Request) builder.factory.class_map['WebClient'] = MockWebClient app = builder.build() exit_code = yield from app.run() self.assertEqual(7, exit_code) self.assertEqual(0, builder.factory['Statistics'].files)
def test_big_payload(self): hash_obj = hashlib.sha1(b"foxfoxfox") payload_list = [] for dummy in range(10000): data = hash_obj.digest() hash_obj.update(data) payload_list.append(data) data = hash_obj.digest() payload_list.append(data) expected_payload = b"".join(payload_list) arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/big_payload")]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertTrue(os.path.exists("big_payload")) with open("big_payload", "rb") as in_file: self.assertEqual(expected_payload, in_file.read()) self.assertEqual(0, exit_code) self.assertEqual(1, builder.factory["Statistics"].files)
def test_app_phantomjs(self): arg_parser = AppArgumentParser() script_filename = os.path.join(os.path.dirname(__file__), 'sample_user_scripts', 'boring.plugin.py') # Change localhost into something else to test proxy args = arg_parser.parse_args([ self.get_url('/static/simple_javascript.html').replace( 'localhost', 'example.invalid'), '--warc-file', 'test', '--no-warc-compression', '-4', '--no-robots', '--phantomjs', '--phantomjs-exe', 'phantomjs', '--phantomjs-wait', '0.1', '--phantomjs-scroll', '2', '--header', 'accept-language: dragon', '--plugin-script', script_filename, '--no-check-certificate', ]) builder = Builder(args, unit_test=True) builder.factory.class_map['Resolver'] = MockDNSResolver app = builder.build() exit_code = yield from app.run() self.assertTrue(os.path.exists('test.warc')) self.assertTrue(os.path.exists('simple_javascript.html.snapshot.html')) self.assertTrue(os.path.exists('simple_javascript.html.snapshot.pdf')) with open('simple_javascript.html.snapshot.html', 'rb') as in_file: data = in_file.read() self.assertIn(b'Hello world!', data) with open('test.warc', 'rb') as in_file: data = in_file.read() self.assertIn(b'urn:X-wpull:snapshot?url=', data) self.assertIn(b'text/html', data) self.assertIn(b'application/pdf', data) self.assertIn(b'application/json', data) self.assertIn(b'"set_scroll_top"', data) try: self.assertIn(b'Accept-Encoding: identity', data) except AssertionError: # webkit treats localhost differently self.assertNotIn(b'Accept-Encoding: gzip', data) self.assertIn(b'Accept-Language: dragon', data) self.assertEqual(0, exit_code) self.assertGreaterEqual(builder.factory['Statistics'].files, 1)
def test_no_content(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/no_content"), "--tries=1"]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(1, builder.factory["Statistics"].files)
def test_strip_session_id(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/forum/"), "-r", "--strip-session-id"]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(1, builder.factory["Statistics"].files)
def test_non_http_redirect(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/non_http_redirect"), "--recursive", "--no-robots"]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(0, builder.factory["Statistics"].files)
def test_ignore_length(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/underrun"), "--ignore-length", "--no-robots"]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(1, builder.factory["Statistics"].files)
def test_referer_option(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/referrer/"), "-r", "--referer", "http://left.shark/"]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(2, builder.factory["Statistics"].files)
def test_session_timeout(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/sleep_long"), "--tries=1", "--session-timeout=0.1"]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(4, exit_code) self.assertEqual(0, builder.factory["Statistics"].files)
def test_immediate_robots_forbidden(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/forbidden"), "--recursive"]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(0, builder.factory["Statistics"].files)
def test_escaped_fragment_recursive(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/escape_from_fragments/"), "-r", "--escaped-fragment"]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(2, builder.factory["Statistics"].files)
def test_misc_urls(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args(["http://[0:0:0:0:0:ffff:a00:0]/", "--tries", "1", "--timeout", "0.5", "-r"]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(4, exit_code)
def test_database_uri(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/"), "--database-uri", "sqlite:///test.db"]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code)
def test_basic_auth_fail(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/basic_auth"), "--user", "root", "--password", "toothless"]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(0, builder.factory["Statistics"].files)
def test_app_phantomjs(self): arg_parser = AppArgumentParser() script_filename = os.path.join(os.path.dirname(__file__), 'sample_user_scripts', 'boring.plugin.py') # Change localhost into something else to test proxy args = arg_parser.parse_args([ self.get_url('/static/simple_javascript.html').replace('localhost', 'example.invalid'), '--warc-file', 'test', '--no-warc-compression', '-4', '--no-robots', '--phantomjs', '--phantomjs-exe', 'phantomjs', '--phantomjs-wait', '0.1', '--phantomjs-scroll', '2', '--header', 'accept-language: dragon', '--plugin-script', script_filename, '--no-check-certificate', ]) builder = Builder(args, unit_test=True) builder.factory.class_map['Resolver'] = MockDNSResolver app = builder.build() exit_code = yield from app.run() self.assertTrue(os.path.exists('test.warc')) self.assertTrue( os.path.exists('simple_javascript.html.snapshot.html') ) self.assertTrue( os.path.exists('simple_javascript.html.snapshot.pdf') ) with open('simple_javascript.html.snapshot.html', 'rb') as in_file: data = in_file.read() self.assertIn(b'Hello world!', data) with open('test.warc', 'rb') as in_file: data = in_file.read() self.assertIn(b'urn:X-wpull:snapshot?url=', data) self.assertIn(b'text/html', data) self.assertIn(b'application/pdf', data) self.assertIn(b'application/json', data) self.assertIn(b'"set_scroll_top"', data) try: self.assertIn(b'Accept-Encoding: identity', data) except AssertionError: # webkit treats localhost differently self.assertNotIn(b'Accept-Encoding: gzip', data) self.assertIn(b'Accept-Language: dragon', data) self.assertEqual(0, exit_code) self.assertGreaterEqual(builder.factory['Statistics'].files, 1)
def test_database_path_question_mark(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/"), "--database", "test?.db"]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertTrue(os.path.exists("test_.db"))
def test_app_args_post_data(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ self.get_url('/post/'), '--post-data', 'text=hi', ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code)
def test_many_page_with_some_fail(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/blog/"), "--no-parent", "--recursive", "--page-requisites", "-4"]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(ExitStatus.server_error, exit_code) self.assertGreater(builder.factory["Statistics"].files, 1) self.assertGreater(builder.factory["Statistics"].duration, 3)
def test_no_iri(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args( [self.get_url('/'), '--no-iri', '--no-robots']) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(1, builder.factory['Statistics'].files)
def test_iri_handling(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/static/mojibake.html"), "-r", "--database", "temp-unittest.db"]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() urls = tuple(url_record.url for url_record in builder.factory["URLTable"].get_all()) self.assertIn(self.get_url("/%E6%96%87%E5%AD%97%E5%8C%96%E3%81%91"), urls) self.assertEqual(0, exit_code)
def test_database_uri(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args( [self.get_url('/'), '--database-uri', 'sqlite:///test.db']) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code)
def test_quota(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/blog/"), "--recursive", "--quota", "1"]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(1, builder.factory["Statistics"].files)
def test_escaped_fragment_recursive(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ self.get_url('/escape_from_fragments/'), '-r', '--escaped-fragment' ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(2, builder.factory['Statistics'].files)
def test_immediate_robots_error(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args( ["http://127.0.0.1:1", self.get_url("/"), "--recursive", "--tries", "1", "--timeout", "10"] ) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(4, exit_code) self.assertEqual(1, builder.factory["Statistics"].files)
def test_referer_option(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ self.get_url('/referrer/'), '-r', '--referer', 'http://left.shark/' ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(2, builder.factory['Statistics'].files)
def test_session_timeout(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ self.get_url('/sleep_long'), '--tries=1', '--session-timeout=0.1' ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(4, exit_code) self.assertEqual(0, builder.factory['Statistics'].files)
def test_check_certificate(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ self.get_url('/'), '--no-robots', ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(5, exit_code)
def test_escaped_fragment_input_url(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([self.get_url("/escape_from_fragments/#!husky-cat"), "--escaped-fragment"]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(1, builder.factory["Statistics"].files) self.assertTrue(os.path.exists("index.html?_escaped_fragment_=husky-cat"))
def test_database_path_question_mark(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args( [self.get_url('/'), '--database', 'test?.db']) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertTrue(os.path.exists('test_.db'))
def test_page_requisite_level(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args( [self.get_url("/infinite_iframe/"), "-r", "--page-requisites", "--page-requisites-level", "1"] ) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(2, builder.factory["Statistics"].files)
def test_link_type(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args( [self.get_url("/always200/"), "-r", "--page-requisites", "--page-requisites-level", "2"] ) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(4, builder.factory["Statistics"].files)
def test_globbing(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ self.get_url('/read*.txt'), ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() print(list(os.walk('.'))) self.assertEqual(0, exit_code) self.assertEqual(1, builder.factory['Statistics'].files)
def test_login_fail(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ self.get_url('/example (copy).txt'), '--user', 'smaug', '--password', 'hunter2', '--tries', '1' ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(6, exit_code) self.assertEqual(0, builder.factory['Statistics'].files)
def test_immediate_robots_forbidden(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ self.get_url('/forbidden'), '--recursive', ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(0, builder.factory['Statistics'].files)
def test_file_vs_directory(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ self.get_url('/example2💎'), '--no-host-directories', '--no-remove-listing', '-r', '-l=1', '--tries=1' ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() print(list(os.walk('.'))) self.assertEqual(0, exit_code) self.assertTrue(os.path.exists('example2💎/.listing'))
def test_app_python_script_stop(self): arg_parser = AppArgumentParser() filename = os.path.join(os.path.dirname(__file__), 'sample_user_scripts', 'stopper.plugin.py') args = arg_parser.parse_args([ self.get_url('/'), '--plugin-script', filename, ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(1, exit_code)
def test_referer_option_negative(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ self.get_url('/referrer/'), '-r', '--referer', 'http://superinformation.highway/', '--tries', '1', '--waitretry', '.1' ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(0, builder.factory['Statistics'].files)
def test_strip_session_id(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ self.get_url('/forum/'), '-r', '--strip-session-id', ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(1, builder.factory['Statistics'].files)
def test_output_document(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args( [self.get_url('/'), '--output-document', 'blah.dat']) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertTrue(os.path.exists('blah.dat')) self.assertTrue(os.path.getsize('blah.dat')) self.assertEqual(0, exit_code)
def test_invalid_char_dir_list(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ self.get_url('/hidden/invalid_chars/'), '--no-host-directories', '--no-remove-listing', ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() print(list(os.walk('.'))) self.assertEqual(0, exit_code) self.assertTrue(os.path.exists('.listing'))
def test_long_cookie(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ self.get_url('/long_cookie'), ]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(1, builder.factory['Statistics'].files) cookies = list(builder.factory['CookieJar']) _logger.debug('{0}'.format(cookies)) self.assertEqual(0, len(cookies))
def test_app_input_file_arg(self): arg_parser = AppArgumentParser(real_exit=False) with tempfile.NamedTemporaryFile() as in_file: in_file.write(self.get_url('/').encode('utf-8')) in_file.write(b'\n') in_file.write(self.get_url('/blog/?ðfßðfëéå').encode('utf-8')) in_file.flush() args = arg_parser.parse_args(['--input-file', in_file.name]) builder = Builder(args, unit_test=True) app = builder.build() exit_code = yield from app.run() self.assertEqual(0, exit_code) self.assertEqual(builder.factory['Statistics'].files, 2)