def test_download_http_url__no_directory_traversal(tmpdir): """ Test that directory traversal doesn't happen on download when the Content-Disposition header contains a filename with a ".." path part. """ mock_url = 'http://www.example.com/whatever.tgz' contents = b'downloaded' link = Link(mock_url) session = Mock() resp = MockResponse(contents) resp.url = mock_url resp.headers = { # Set the content-type to a random value to prevent # mimetypes.guess_extension from guessing the extension. 'content-type': 'random', 'content-disposition': 'attachment;filename="../out_dir_file"' } session.get.return_value = resp download_dir = tmpdir.joinpath('download') os.mkdir(download_dir) file_path, content_type = _download_http_url( link, session, download_dir, hashes=None, progress_bar='on', ) # The file should be downloaded to download_dir. actual = os.listdir(download_dir) assert actual == ['out_dir_file']
def _pip_download(link, download_dir): # We disable cache control for downloads for two reasons: First, # we're already caching our downloads as resources, so an # additional level of caching, even if efficiently managed, is # probably not worth the cost. Second, the cachecontrol module # used with pip's download facility is unusable with large files # as it reads files into memory: # # https://github.com/ionrock/cachecontrol/issues/145 # from pip._internal.commands.download import DownloadCommand from pip._internal.download import _download_http_url cmd = _pip_cmd(DownloadCommand) options, _ = cmd.parse_args(["--no-cache-dir"]) session = cmd._build_session(options) orig_path, _ = _download_http_url(link, session, download_dir, hashes=None, progress_bar="on") return orig_path