def test_download(): with tempfile.TemporaryDirectory() as temp_dir: with requests_mock.Mocker() as mock: # create mock for main html page html = read_file(os.path.join(FIXTURES_PATH, INPUT_FIXTURE)) mock.get(URL, text=html) # create mock's for all assets for link, fixture_path, _ in ASSETS: asset_link = get_abs_link(page_url=URL, local_link=link) asset_fixture_path = os.path.join(FIXTURES_PATH, fixture_path) bytecode = read_file(file_path=asset_fixture_path, mode='rb') mock.get(asset_link, content=bytecode) # download page from URL download(URL, temp_dir) # assert if downloaded html in temp folder equals expected in fixtures expected = read_file(os.path.join(FIXTURES_PATH, EXPECTED_FIXTURE)) downloaded = read_file(os.path.join(temp_dir, EXPECTED_FILENAME)) assert expected == downloaded # assert if expected assets exists in temp folder for _, _, expected_file in ASSETS: assert os.path.exists(f'{temp_dir}/{expected_file}') is True # assert if asset's content equals to asset's fixture for _, fixture_path, expected_file in ASSETS: fixture = read_file(get_full_path(FIXTURES_PATH, fixture_path), mode='rb') expected = read_file(get_full_path(temp_dir, expected_file), mode='rb') assert fixture == expected
def test_not_dir_error(requests_mock): html = read_fixture("index.html") requests_mock.get(URL, text=html) with tempfile.TemporaryDirectory() as output: _, tmp_path = tempfile.mkstemp(dir=os.path.abspath(output)) with pytest.raises(NotADirectoryError): download(URL, tmp_path)
def test_fs_permission_error(requests_mock): html = read_fixture("index.html") requests_mock.get(URL, text=html) with tempfile.TemporaryDirectory() as output: os.chmod(output, stat.S_ENFMT) with pytest.raises(PermissionError): download(URL, output)
def test_download_permission_denied(): with TemporaryDirectory() as tempdir: with requests_mock.Mocker() as mock: mock.get(URL, text="text") os.chmod(tempdir, stat.S_IRUSR) with pytest.raises(errors.SavingError): download(URL, tempdir)
def test_download_with_existing_name(): with TemporaryDirectory() as tempdir: with requests_mock.Mocker() as mock: mock.get(URL, text="text") download(URL, tempdir) with pytest.raises(errors.SavingError): download(URL, tempdir)
def test_http_errors(requests_mock, status): route = map_status_to_route[status] url = urljoin(URL, route) requests_mock.get(url, status_code=int(status)) with tempfile.TemporaryDirectory() as output: with pytest.raises(requests.exceptions.HTTPError): download(url, output)
def test_connection_errors(): with tempfile.TemporaryDirectory() as tmpdirname: with requests_mock.Mocker() as response: response.get('http://google.com', exc=ConnectionError) with pytest.raises(ConnectionError): download('http://google.com', tmpdirname) response.get('http://google.com', exc=TimeoutError) with pytest.raises(TimeoutError): download('http://google.com', tmpdirname)
def test_exceptions(url): # Test with wrong url with TemporaryDirectory() as tempdir: with pytest.raises(ConnectionError): download(url, tempdir) # Test with wrong output path with requests_mock.Mocker() as rmock: rmock.get(url, text="Test") with pytest.raises(PathAccessError): download(url, 'wrong')
def test_iscorrect_img_name(): with tempfile.TemporaryDirectory() as tmpdir: url = "https://page-loader.hexlet.repl.co" download(url, tmpdir) expect_dir_name = os.path.join(tmpdir, "page-loader-hexlet-repl-co_files") expect_img_file_name = os.path.join( tmpdir, expect_dir_name, "page-loader-hexlet-repl-co-assets-professions-nodejs.png") assert os.path.isfile(expect_img_file_name)
def test(): with tempfile.TemporaryDirectory() as tmpdirname: with requests_mock.Mocker() as response: for link, content in EXPECTED_CONTENT.items(): with open(content, 'r') as fixture_content: response.get(link, text=fixture_content.read()) for link, content in EXPECTED_BINARY_CONTENT.items(): with open(content, 'rb') as fixture_content: response.get(link, content=fixture_content.read()) response.get('http://google.com', status_code=301, headers={'Location': 'https://ru.hexlet.io/courses'}) download('http://google.com', tmpdirname) result_file = os.path.join(tmpdirname, 'ru-hexlet-io-courses.html') assert os.path.exists(result_file) is True, 'page should exist' with open(result_file) as result_file, open(EXPECTED_PAGE) as expected_page: assert result_file.read() == expected_page.read(), 'page should be equal' files_dir = os.path.join(tmpdirname, 'ru-hexlet-io-courses_files/') assert content_exists(files_dir, 'ru-hexlet-io-images-python-icon.png'), 'all images should be downloaded' result_image = os.path.join(tmpdirname, 'ru-hexlet-io-courses_files/', 'ru-hexlet-io-images-python-icon.png') with open(EXPECTED_IMAGE1, 'rb') as expected_image, open(result_image, 'rb') as result_image: assert expected_image.read() == result_image.read(), 'images should be equal' assert content_exists(files_dir, 'ru-hexlet-io-images-python-icon2.png'), 'all images should be downloaded' result_image = os.path.join(tmpdirname, 'ru-hexlet-io-courses_files/', 'ru-hexlet-io-images-python-icon2.png') with open(EXPECTED_IMAGE2, 'rb') as expected_image, open(result_image, 'rb') as result_image: assert expected_image.read() == result_image.read(), 'images should be equal' assert content_exists(files_dir, 'ru-hexlet-io-assets-application.css'), 'links content should be downloaded' result_file = os.path.join(tmpdirname, 'ru-hexlet-io-courses_files/', 'ru-hexlet-io-assets-application.css') with open(EXPECTED_APPLICATION_CSS, 'r') as expected_file, open(result_file, 'r') as result_file: assert result_file.read() == expected_file.read(), 'downloaded files should be equal' application_css = os.path.join(files_dir, 'ru-hexlet-io-assets-application.css') with open(application_css) as application_css, open(EXPECTED_APPLICATION_CSS) as expected_application_css: assert application_css.read() == expected_application_css.read(), 'application.css should be equal' assert content_exists(files_dir, 'ru-hexlet-io-courses.html'), 'links content should be downloaded' courses_html = os.path.join(files_dir, 'ru-hexlet-io-courses.html') with open(courses_html) as courses_html, open(EXPECTED_COURSES_HTML) as expected_courses_html: assert courses_html.read() == expected_courses_html.read(), 'courses.html should be equal' assert not content_exists(files_dir, 'ru-hexlet-io-cdn2-hexlet-io-assets-menu.css'), 'files from other host should not be downloaded' assert content_exists(files_dir, 'ru-hexlet-io-packs-js-runtime.js'), 'script content should be downloaded' runtime_js = os.path.join(files_dir, 'ru-hexlet-io-packs-js-runtime.js') with open(runtime_js) as runtime_js, open(EXPECTED_RUNTIME_JS) as expected_runtime_js: assert runtime_js.read() == expected_runtime_js.read(), 'runtime.js should be equal'
def test_http_errors(): with tempfile.TemporaryDirectory() as tmpdirname: with requests_mock.Mocker() as response: fixture = 'tests/fixtures/error_test_page.html' with open(fixture, 'r') as fixture_content: response.get('http://google.com', text=fixture_content.read()) response.get('http://google.com/images/python-icon.png', status_code=404) download('http://google.com', tmpdirname) result_file = os.path.join(tmpdirname, 'google-com.html') assert os.path.exists(result_file) is True, "broken file links shouldn't raise errors" response.get('http://google.com', status_code=404) with pytest.raises(requests.HTTPError) as error: download('http://google.com', tmpdirname) assert '404' in str(error.value)
def test_page_loader(requests_mock): html = read_fixture("index.html") requests_mock.get(URL, text=html) for resource in RESOURCES: url = RESOURCES[resource]["url"] output_content = read_fixture(os.path.join("res", resource), "rb") requests_mock.get(urljoin(URL, url), content=output_content) with tempfile.TemporaryDirectory() as output: html_path = os.path.join(output, HTML_NAME) resources_dir_path = os.path.join(output, RESOURCES_DIR_NAME) output_path = download(URL, output) html_content = read_file(html_path) expected_html_content = read_fixture("expected-index.html") assert output_path == html_path assert html_content == expected_html_content assert len(os.listdir(resources_dir_path)) == len(RESOURCES) for resource in RESOURCES: output_name = RESOURCES[resource]["expected_name"] output_path = os.path.join(resources_dir_path, output_name) fixture_path = os.path.join("res", resource) output_content = read_file(output_path, "rb") expected_content = read_fixture(fixture_path, "rb") assert output_content == expected_content
def test_download_page(requests_mock): html_file = get_file(FIXTURE_DIR / HTML_FILE_NAME) requests_mock.get(BASE_URL, text=html_file) html_reference_filepath = FIXTURE_DIR / HTML_FILE_NAME for asset in ASSETS: asset_url = urljoin(BASE_URL, asset['url_path']) reference_fs_path = FIXTURE_DIR / \ ASSETS_DIR_NAME / \ asset['file_name'] reference_content = get_file(reference_fs_path, 'rb') asset['content'] = reference_content requests_mock.get(asset_url, content=reference_content) with TemporaryDirectory() as tmp_dir: assert not os.listdir(tmp_dir) print(type(requests_mock)) #print('base_UUUURL\n'+BASE_URL) #print('tmp_dir\n'+tmp_dir) # with requests_mock.Mocker(real_http=True) as m: # m.register_uri('GET', 'BASE_URL', text='resp') # print(requests.get('BASE_URL').text) # print(requests.get('BASE_URL').status_code) downloaded_html_file_path = download(BASE_URL, Path(tmp_dir).resolve())
def test_response_error(requests_mock, code): url = urljoin(BASE_URL, "not-real-url") requests_mock.get(url, status_code=code) with TemporaryDirectory() as tmp_dir: with pytest.raises(requests.exceptions.HTTPError): assert download(url, tmp_dir)
def main(): """Page-loader script.""" url, output_path = get_arguments() exit_status = 0 try: print(download(url, output_path)) except errors.AppInternalError: exit_status = 1 sys.exit(exit_status)
def main(): args = arg_parser.parse() try: logging.basicConfig(level=logging.INFO) logging.info('Start download') print('Path webpage: ', download(args.url, args.output)) except Exception as e: logging.critical(e) sys.exit(1)
def main(): args = get_parsed_args() setup(log_level=args.log_level, filename=args.file) try: file_path = download(args.url, args.output) print('The webpage had been successfully downloaded to\n', file_path) except Exception as e: logging.error(str(e)) logging.debug(str(e), exc_info=True), sys.exit(1)
def test_page_download(fixture_tempdir, **kwargs): mock_source = kwargs['m'] mock_source.get('mock://test.com', text='test_text') loaded_file = download('mock://test.com', fixture_tempdir) expected = BeautifulSoup( requests.get('mock://test.com').content, 'html.parser', ) with open(loaded_file, 'r', encoding='utf-8') as file: resulting = file.read() assert resulting == expected.prettify(formatter='html5')
def test_download(): expected_text = "Test data" expected_name = "ru-hexlet-io-professions.html" with requests_mock.Mocker() as rmock: rmock.get("https://ru.hexlet.io/professions", text=expected_text) with TemporaryDirectory() as tempdir: local_page, _ = download("https://ru.hexlet.io/professions", tempdir) received_text = local_page.read_text(encoding='utf-8') assert received_text == expected_text assert local_page.name == expected_name
def main(): args = parse_args() logging.configure_logger(args.loglevel) logger.debug("Program started with arguments: {}".format(args)) try: page, resources = download(args.url, args.output, args.externals) except logging.PageLoaderError: exit(1) if args.x: webbrowser.open(page) print(page) exit(0)
def test_network_error(requests_mock): bad_url = "https://shm____oogle.com" reference_exception = requests.exceptions.ConnectionError requests_mock.get(bad_url, exc=reference_exception) with TemporaryDirectory() as tmp_dir: assert not os.listdir(tmp_dir) with pytest.raises(reference_exception): assert download(bad_url, tmp_dir) assert not os.listdir(tmp_dir)
def test_io_errors(): with pytest.raises(NotADirectoryError): download(URL, './tests/__init__.py') with pytest.raises(FileNotFoundError): download(URL, 'non_existing_path/') with pytest.raises(PermissionError): download(URL, '/sys')
def main(): parser = argparse.ArgumentParser( description="Download HTML-page into file" ) parser.add_argument( "web_source", type=str, help="destination of resource to download" ) parser.add_argument( "-o", "--output", type=str, help="'path_to_file' - destination to download HTML-page." "(Default: 'cwd')", default=os.getcwd() ) parser.parse_args() ie_source = parser.parse_args().web_source output = parser.parse_args().output try: print(download(ie_source, output)) except FileNotFoundError as fnfe: print(fnfe) sys.exit(1) except PermissionError as pe: print(pe) sys.exit(1) except NotADirectoryError as nade: print(nade) sys.exit(1) except requests.exceptions.ConnectionError: print(f'Unable to connect to {ie_source}') sys.exit(1) except ConnectionAbortedError as cae: print(cae) sys.exit(1) except FileExistsError as fee: print(fee) sys.exit(1) except Exception as e: print(e) sys.exit(1) else: sys.exit(0)
def test_download_with_local_resources( open_with_local_resources, open_with_changed_paths, open_test_css, open_test_html, open_test_js, open_test_png, ): mocks = { URL: open_with_local_resources, "http://test.com/assets/application.css": open_test_css, "http://test.com/courses": open_test_html, "http://test.com/assets/professions/nodejs.png": open_test_png, "http://test.com/packs/js/runtime.js": open_test_js, } with TemporaryDirectory() as tempdir: with requests_mock.Mocker() as mock: for url, content in mocks.items(): if isinstance(content, bytes): mock.get(url, content=content) else: mock.get(url, text=content) received_html_path = download("http://test.com", tempdir) dir_path = os.path.join(tempdir, "test-com_files") received_resource_list = sorted(os.listdir(dir_path)) assert received_html_path == f"{tempdir}/test-com.html" assert received_resource_list == [ "test-com-assets-application.css", "test-com-assets-professions-nodejs.png", "test-com-courses.html", "test-com-packs-js-runtime.js", ] with open(received_html_path, "r") as received: assert received.read() == open_with_changed_paths with open( f"{dir_path}/test-com-assets-application.css", "r" ) as received: # noqa: E501 assert received.read() == open_test_css with open( f"{dir_path}/test-com-assets-professions-nodejs.png", "rb" ) as received: assert received.read() == open_test_png with open(f"{dir_path}/test-com-courses.html", "r") as received: assert received.read() == open_test_html with open(f"{dir_path}/test-com-packs-js-runtime.js", "r") as received: assert received.read() == open_test_js
def main(): args = cli.get_args_parser().parse_args() logging.setup(args.log_level, filename=args.log_file) try: path = download(args.url, args.output) except Exception as e: import logging as logger logger.error(str(e)) logger.debug(str(e), exc_info=True) sys.exit(1) else: print(f'Success! File path: {path}')
def main(): """Main page_loader script.""" try: logger.info('Page-loader script started!') url, path = get_args().URL, get_args().output file_path = download(url=url, path=path) except Exception as err: print(err) logger.info('Page-loader script finished.') sys.exit(1) else: logger.info('Page-loader script finished.') print(f'Output page had been saved to \'{file_path}\'') sys.exit(0)
def test_resources_download(page, image, style, script): address = "https://local.com" with requests_mock.Mocker() as rmock: rmock.get(address, text=page.read_text(encoding='utf-8')) rmock.get(f"{address}/assets/style.css", text=style.read_text()) rmock.get(f"{address}/assets/script.js", text=script.read_text()) rmock.get(f"{address}/assets/python.png", content=image.read_bytes()) with TemporaryDirectory() as tempdir: page_path, resources_path = download(address, tempdir, False) assert page_path.read_text(encoding='utf-8') != page.read_text( encoding='utf-8') # Check resources exist in *_files directory. resources_list = [x.name for x in resources_path.iterdir()] assert len(resources_list) == 3 for f in ['style.css', 'script.js', 'python.png']: assert f"local-com-assets-{f}" in resources_list
def test_page_loader(mocker): """ Test page loader. Args: mocker: mocker """ mocker.patch( 'random.choices', return_value=['h', '2', 'i', 'S', '4', 'x', 'h', '2', 'i', 'S'], ) with tempfile.TemporaryDirectory() as tempdir: expected_path_index_page = join(tempdir, 'test-com.html') with open(get_file_absolute_path('page.html')) as file_before: with requests_mock.Mocker() as mock: long_name = ''.join(['a' for _ in range(MAX_LENGTH_FILENAME + 45)]) mock.get(URL, text=file_before.read()) mock.get( '{url}/{link_name}LongLink'.format( url=URL, link_name=long_name, ), content=b'html', ) mock.get('{url}/images/python.png'.format(url=URL), content=b'png') mock.get('{url}/scripts/test.js'.format(url=URL), content=b'js') mock.get('{url}/courses'.format(url=URL), content=b'html') mock.get('{url}/styles/app.css'.format(url=URL), content=b'css') path_index_page = download(URL, tempdir) assert expected_path_index_page == path_index_page resources_dir = join(tempdir, 'test-com_files') assert isdir(resources_dir) expected_files = [ 'test-com-{name}h2iS4xh2iS.html'.format(name=long_name[:231]), 'test-com-images-python.png', 'test-com-courses.html', 'test-com-scripts-test.js', 'test-com-styles-app.css', ] for filename in expected_files: assert isfile(join(resources_dir, filename)) with open(get_file_absolute_path('expected.html')) as file_exp: with open(path_index_page) as file_tested: assert file_exp.read() == file_tested.read()
def fixture_source_mock(fixture_tempdir, **kwargs): mock_source = kwargs['mock'] with open('tests/fixtures/test.html', 'r', encoding='utf-8') as test_file: test_text = test_file.read() mock_source.get('mock://test.com', text=test_text) with open('tests/fixtures/haha.png', 'rb') as img_file: img_img = img_file.read() mock_source.get('mock://test.com/haha.png', content=img_img) with open('tests/fixtures/test1.css', 'r', encoding='utf-8') as css_file: css_text = css_file.read() mock_source.get('mock://test.com/test1.css', text=css_text) with open('tests/fixtures/test2.js', 'r', encoding='utf-8') as js_file: js_text = js_file.read() mock_source.get('mock://test.com/test2.js', text=js_text) mock_source.get('mock://test.com/courses', text='') path_to_html = download('mock://test.com', fixture_tempdir) return path_to_html
def main(): args = cli.set_parser() if args.verbose == "debug": set_logging(log.DEBUG) elif args.verbose == "info": set_logging(log.INFO) else: set_logging(log.WARNING) try: path = download(args.url, args.output) except errors.KnownError as error: log.error(f"{error}") sys.exit(EXIT_ERROR) else: print(f"\u2714 Page was successfully downloaded into '{path}'") log.info("Page with resources were successfully downloaded") sys.exit(EXIT_OK)