def _select_binary_stream(self, name, binary_path, fetcher=None): """Select a binary matching the current os and architecture. :param string binary_path: The path to the binary to fetch. :param fetcher: Optional argument used only for testing, to 'pretend' to open urls. :returns: a 'stream' to download it from a support directory. The returned 'stream' is actually a lambda function which returns the files binary contents. :raises: :class:`pants.binary_util.BinaryUtil.BinaryNotFound` if no binary of the given version and name could be found for the current platform. """ if not self._baseurls: raise self.NoBaseUrlsError( 'No urls are defined for the --pants-support-baseurls option.') downloaded_successfully = False accumulated_errors = [] for baseurl in OrderedSet(self._baseurls): # Wrap in OrderedSet because duplicates are wasteful. url = posixpath.join(baseurl, binary_path) logger.info('Attempting to fetch {name} binary from: {url} ...'.format(name=name, url=url)) try: with temporary_file() as dest: fetcher = fetcher or Fetcher() fetcher.download(url, listener=Fetcher.ProgressListener(), path_or_fd=dest) logger.info('Fetched {name} binary from: {url} .'.format(name=name, url=url)) downloaded_successfully = True dest.seek(0) yield lambda: dest.read() break except (IOError, Fetcher.Error, ValueError) as e: accumulated_errors.append('Failed to fetch binary from {url}: {error}' .format(url=url, error=e)) if not downloaded_successfully: raise self.BinaryNotFound(binary_path, accumulated_errors)
def setUp(self): super(FetcherTest, self).setUp() self.requests = self.mox.CreateMockAnything() self.response = self.mox.CreateMock(requests.Response) self.fetcher = Fetcher(requests_api=self.requests) self.listener = self.mox.CreateMock(Fetcher.Listener)
def _bootstrap_ivy(self, bootstrap_jar_path): options = self._ivy_subsystem.get_options() if not os.path.exists(bootstrap_jar_path): with temporary_file() as bootstrap_jar: fetcher = Fetcher() checksummer = fetcher.ChecksumListener(digest=hashlib.sha1()) try: logger.info('\nDownloading {}'.format(options.bootstrap_jar_url)) # TODO: Capture the stdout of the fetcher, instead of letting it output # to the console directly. fetcher.download(options.bootstrap_jar_url, listener=fetcher.ProgressListener().wrap(checksummer), path_or_fd=bootstrap_jar, timeout_secs=options.bootstrap_fetch_timeout_secs) logger.info('sha1: {}'.format(checksummer.checksum)) bootstrap_jar.close() touch(bootstrap_jar_path) shutil.move(bootstrap_jar.name, bootstrap_jar_path) except fetcher.Error as e: raise self.Error('Problem fetching the ivy bootstrap jar! {}'.format(e)) return Ivy(bootstrap_jar_path, ivy_settings=options.bootstrap_ivy_settings or options.ivy_settings, ivy_cache_dir=options.cache_dir, extra_jvm_options=self._ivy_subsystem.extra_jvm_options())
def bootstrap_coursier(self, workunit_factory): opts = self.get_options() bootstrap_url = opts.bootstrap_jar_url coursier_bootstrap_dir = os.path.join(opts.pants_bootstrapdir, 'tools', 'jvm', 'coursier', opts.version) bootstrap_jar_path = os.path.join(coursier_bootstrap_dir, 'coursier.jar') with workunit_factory(name='bootstrap-coursier', labels=[WorkUnitLabel.TOOL]) as workunit: if not os.path.exists(bootstrap_jar_path): with safe_concurrent_creation(bootstrap_jar_path) as temp_path: fetcher = Fetcher(get_buildroot()) checksummer = fetcher.ChecksumListener(digest=hashlib.sha1()) try: logger.info('\nDownloading {}'.format(bootstrap_url)) # TODO: Capture the stdout of the fetcher, instead of letting it output # to the console directly. fetcher.download(bootstrap_url, listener=fetcher.ProgressListener().wrap(checksummer), path_or_fd=temp_path, timeout_secs=opts.bootstrap_fetch_timeout_secs) logger.info('sha1: {}'.format(checksummer.checksum)) except fetcher.Error as e: workunit.set_outcome(WorkUnit.FAILURE) raise self.Error('Problem fetching the coursier bootstrap jar! {}'.format(e)) else: workunit.set_outcome(WorkUnit.SUCCESS) return bootstrap_jar_path
def resolve_target(self, node_task, target, results_dir, node_paths): self._copy_sources(target, results_dir) with temporary_dir() as temp_dir: archive_file_name = urllib_parse.urlsplit( target.dependencies_archive_url).path.split('/')[-1] if not archive_file_name: raise TaskError( 'Could not determine archive file name for {target} from {url}' .format(target=target.address.reference(), url=target.dependencies_archive_url)) download_path = os.path.join(temp_dir, archive_file_name) logger.info( 'Downloading archive {archive_file_name} from ' '{dependencies_archive_url} to {path}'.format( archive_file_name=archive_file_name, dependencies_archive_url=target.dependencies_archive_url, path=download_path)) try: Fetcher(get_buildroot()).download( target.dependencies_archive_url, listener=Fetcher.ProgressListener(), path_or_fd=download_path, timeout_secs=self.get_options().fetch_timeout_secs) except Fetcher.Error as error: raise TaskError( 'Failed to fetch preinstalled node_modules for {target} from {url}: {error}' .format(target=target.address.reference(), url=target.dependencies_archive_url, error=error)) logger.info( 'Fetched archive {archive_file_name} from {dependencies_archive_url} to {path}' .format( archive_file_name=archive_file_name, dependencies_archive_url=target.dependencies_archive_url, path=download_path)) archiver_for_path(archive_file_name).extract( download_path, temp_dir) extracted_node_modules = os.path.join(temp_dir, 'node_modules') if not os.path.isdir(extracted_node_modules): raise TaskError( 'Did not find an extracted node_modules directory for {target} ' 'inside {dependencies_archive_url}'.format( target=target.address.reference(), dependencies_archive_url=target. dependencies_archive_url)) shutil.move(extracted_node_modules, os.path.join(results_dir, 'node_modules'))
def _select_binary_stream(self, name, urls): """Download a file from a list of urls, yielding a stream after downloading the file. URLs are tried in order until they succeed. :raises: :class:`BinaryToolFetcher.BinaryNotFound` if requests to all the given urls fail. """ downloaded_successfully = False accumulated_errors = [] for url in OrderedSet( urls): # De-dup URLS: we only want to try each URL once. logger.info( 'Attempting to fetch {name} binary from: {url} ...'.format( name=name, url=url)) try: with temporary_file() as dest: logger.debug( "in BinaryToolFetcher: url={}, timeout_secs={}".format( url, self._timeout_secs)) self._fetcher.download(url, listener=Fetcher.ProgressListener(), path_or_fd=dest, timeout_secs=self._timeout_secs) logger.info('Fetched {name} binary from: {url} .'.format( name=name, url=url)) downloaded_successfully = True dest.seek(0) yield dest break except (IOError, Fetcher.Error, ValueError) as e: accumulated_errors.append( 'Failed to fetch binary from {url}: {error}'.format( url=url, error=e)) if not downloaded_successfully: raise self.BinaryNotFound(name, accumulated_errors)
def test_progress_listener(self, timer): timer.side_effect = [0, 1.137] stream = StringIO() progress_listener = Fetcher.ProgressListener(width=5, chunk_size_bytes=1, stream=stream) with self.expect_get('http://baz', chunk_size_bytes=1, timeout_secs=37, chunks=[[1]] * 1024) as (chunks, expected_listener_calls): self.fetcher.fetch('http://baz', progress_listener.wrap(self.listener), chunk_size_bytes=1, timeout_secs=37) self.assert_listener_calls(expected_listener_calls, chunks) # We just test the last progress line which should indicate a 100% complete download. # We control progress bar width (5 dots), size (1KB) and total time downloading (fake 1.137s). self.assertEqual('100% ..... 1 KB 1.137s\n', stream.getvalue().split('\r')[-1])
def test_checksum_listener(self): digest = mock.Mock(spec=hashlib.md5()) digest.hexdigest.return_value = '42' checksum_listener = Fetcher.ChecksumListener(digest=digest) with self.expect_get('http://baz', chunk_size_bytes=1, timeout_secs=37) as (chunks, expected_listener_calls): self.fetcher.fetch('http://baz', checksum_listener.wrap(self.listener), chunk_size_bytes=1, timeout_secs=37) self.assertEqual('42', checksum_listener.checksum) def expected_digest_calls(): for chunk in chunks: yield mock.call.update(chunk) yield mock.call.hexdigest() self.assertEqual(list(expected_digest_calls()), digest.method_calls) self.assert_listener_calls(expected_listener_calls, chunks) self.response.close.assert_called_once_with()
def test_execute_java_no_error_weird_path(self): """ :API: public """ with temporary_file(suffix=".jar") as temp_path: fetcher = Fetcher(get_buildroot()) try: # Download a jar that echoes things. fetcher.download( "https://maven-central.storage-download.googleapis.com/repos/central/data/io/get-coursier/echo/1.0.0/echo-1.0.0.jar", path_or_fd=temp_path.name, timeout_secs=2, ) except fetcher.Error: self.fail("fail to download echo jar") task = self.execute(self.context([])) executor = task.create_java_executor() # Executing the jar as is should work. self.assertEqual( 0, util.execute_java( executor=executor, classpath=[temp_path.name], main="coursier.echo.Echo", args=["Hello World"], create_synthetic_jar=True, ), ) # Rename the jar to contain reserved characters. new_path = os.path.join(os.path.dirname(temp_path.name), "%%!!!===++.jar") safe_concurrent_rename(temp_path.name, new_path) # Executing the new path should work. self.assertEqual( 0, util.execute_java( executor=executor, classpath=[new_path], main="coursier.echo.Echo", args=["Hello World"], create_synthetic_jar=True, ), )
def test_download_redirect(self): """Make sure that a server that returns a redirect is actually followed. Test with a real HTTP server that redirects from one URL to another. """ fetcher = Fetcher() with self.setup_server() as base_url: self._URL = base_url self.assertFalse(self._URL2_ACCESSED) self.assertFalse(self._URL1_ACCESSED) path = fetcher.download(base_url + '/url2') self.assertTrue(self._URL2_ACCESSED) self.assertTrue(self._URL1_ACCESSED) with open(path) as fp: self.assertEqual('returned from redirect\r\n', fp.read())
def test_download_redirect(self): """Make sure that a server that returns a redirect is actually followed. Test with a real HTTP server that redirects from one URL to another. """ fetcher = Fetcher('/unused/root/dir') with self.setup_server() as base_url: self._URL = base_url self.assertFalse(self._URL2_ACCESSED) self.assertFalse(self._URL1_ACCESSED) path = fetcher.download(base_url + '/url2') self.assertTrue(self._URL2_ACCESSED) self.assertTrue(self._URL1_ACCESSED) with open(path) as fp: self.assertEqual('returned from redirect\r\n', fp.read())
def assert_local_file_fetch_relative(self, url, *rel_path): expected_contents = b'proof' with temporary_dir() as root_dir: with safe_open(os.path.join(root_dir, *rel_path), 'wb') as fp: fp.write(expected_contents) with temporary_file() as download_fp: Fetcher(root_dir).download(url, path_or_fd=download_fp) download_fp.close() with open(download_fp.name, 'rb') as fp: self.assertEqual(expected_contents, fp.read())
def _bootstrap_ivy(self, bootstrap_jar_path): options = self._ivy_subsystem.get_options() if not os.path.exists(bootstrap_jar_path): with temporary_file() as bootstrap_jar: fetcher = Fetcher(get_buildroot()) checksummer = fetcher.ChecksumListener(digest=hashlib.sha1()) try: logger.info('\nDownloading {}'.format( options.bootstrap_jar_url)) # TODO: Capture the stdout of the fetcher, instead of letting it output # to the console directly. fetcher.download( options.bootstrap_jar_url, listener=fetcher.ProgressListener().wrap(checksummer), path_or_fd=bootstrap_jar, timeout_secs=options.bootstrap_fetch_timeout_secs) logger.info('sha1: {}'.format(checksummer.checksum)) bootstrap_jar.close() touch(bootstrap_jar_path) shutil.move(bootstrap_jar.name, bootstrap_jar_path) except fetcher.Error as e: raise self.Error( 'Problem fetching the ivy bootstrap jar! {}'.format(e)) return Ivy(bootstrap_jar_path, ivy_settings=options.bootstrap_ivy_settings or options.ivy_settings, ivy_resolution_cache_dir=self._ivy_subsystem. resolution_cache_dir(), extra_jvm_options=self._ivy_subsystem.extra_jvm_options())
def bootstrap_coursier(self, workunit_factory): opts = self.get_options() bootstrap_url = opts.bootstrap_jar_url coursier_bootstrap_dir = os.path.join(opts.pants_bootstrapdir, 'tools', 'jvm', 'coursier', opts.version) bootstrap_jar_path = os.path.join(coursier_bootstrap_dir, 'coursier.jar') if not os.path.exists(bootstrap_jar_path): with workunit_factory(name='bootstrap-coursier', labels=[WorkUnitLabel.TOOL]) as workunit: with safe_concurrent_creation(bootstrap_jar_path) as temp_path: fetcher = Fetcher(get_buildroot()) checksummer = fetcher.ChecksumListener(digest=hashlib.sha1()) try: logger.info('\nDownloading {}'.format(bootstrap_url)) # TODO: Capture the stdout of the fetcher, instead of letting it output # to the console directly. fetcher.download(bootstrap_url, listener=fetcher.ProgressListener().wrap(checksummer), path_or_fd=temp_path, timeout_secs=opts.bootstrap_fetch_timeout_secs) logger.info('sha1: {}'.format(checksummer.checksum)) except fetcher.Error as e: workunit.set_outcome(WorkUnit.FAILURE) raise self.Error('Problem fetching the coursier bootstrap jar! {}'.format(e)) else: workunit.set_outcome(WorkUnit.SUCCESS) return bootstrap_jar_path
def _bootstrap_ivy(self, bootstrap_jar_path): if not os.path.exists(bootstrap_jar_path): with temporary_file() as bootstrap_jar: fetcher = Fetcher() checksummer = fetcher.ChecksumListener(digest=hashlib.sha1()) try: log.info('\nDownloading %s' % self._bootstrap_jar_url) # TODO: Capture the stdout of the fetcher, instead of letting it output # to the console directly. fetcher.download( self._bootstrap_jar_url, listener=fetcher.ProgressListener().wrap(checksummer), path_or_fd=bootstrap_jar, timeout_secs=self._timeout_secs) log.info('sha1: %s' % checksummer.checksum) bootstrap_jar.close() touch(bootstrap_jar_path) shutil.move(bootstrap_jar.name, bootstrap_jar_path) except fetcher.Error as e: raise self.Error( 'Problem fetching the ivy bootstrap jar! %s' % e) return Ivy(bootstrap_jar_path, ivy_settings=self._ivy_settings, ivy_cache_dir=self.ivy_cache_dir)
def _bootstrap_ivy(self, bootstrap_jar_path): if not os.path.exists(bootstrap_jar_path): with temporary_file() as bootstrap_jar: fetcher = Fetcher() checksummer = fetcher.ChecksumListener(digest=hashlib.sha1()) try: log.info("\nDownloading %s" % self._bootstrap_jar_url) # TODO: Capture the stdout of the fetcher, instead of letting it output # to the console directly. fetcher.download( self._bootstrap_jar_url, listener=fetcher.ProgressListener().wrap(checksummer), path_or_fd=bootstrap_jar, timeout_secs=self._timeout_secs, ) log.info("sha1: %s" % checksummer.checksum) bootstrap_jar.close() touch(bootstrap_jar_path) shutil.move(bootstrap_jar.name, bootstrap_jar_path) except fetcher.Error as e: raise self.Error("Problem fetching the ivy bootstrap jar! %s" % e) return Ivy(bootstrap_jar_path, ivy_settings=self._ivy_settings, ivy_cache_dir=self.ivy_cache_dir)
def fetch_prebuilt_wheels(self, binary_base_url, deploy_pants_wheels_path, deploy_3rdparty_wheels_path, to_dir): wheel_paths = self.list_prebuilt_wheels(binary_base_url, deploy_pants_wheels_path, deploy_3rdparty_wheels_path) if not wheel_paths: raise ValueError("No wheels found.") # Fetching the wheels in parallel # It is okay to have some interleaving outputs from the fetcher, # because we are summarizing things in the end. fetcher = Fetcher(os.getcwd()) checksummer = fetcher.ChecksumListener(digest=hashlib.sha1()) futures = [] with ThreadPoolExecutor(max_workers=8) as executor: for k in wheel_paths: file_path, url_path = k.split(self.OUTPUT_DELIMITER) dest = os.path.join(to_dir, file_path) safe_mkdir(os.path.dirname(dest)) url = '{}/{}'.format(binary_base_url, url_path) future = executor.submit(self._download, fetcher, checksummer, url, dest) futures.append((future, url)) # Summarize the fetch results. fail = False for future, url in futures: if future.exception() is not None: logger.error('Failed to download: {}'.format(url)) fail = True else: logger.info('Downloaded: {}'.format(url)) if fail: raise fetcher.Error()
def test_execute_java_no_error_weird_path(self): """ :API: public """ with temporary_file(suffix='.jar') as temp_path: fetcher = Fetcher(get_buildroot()) try: # Download a jar that echoes things. fetcher.download('https://repo1.maven.org/maven2/io/get-coursier/echo/1.0.0/echo-1.0.0.jar', path_or_fd=temp_path.name, timeout_secs=2) except fetcher.Error: self.fail("fail to download echo jar") task = self.execute(self.context([])) executor = task.create_java_executor() # Executing the jar as is should work. self.assertEquals(0, util.execute_java( executor=executor, classpath=[temp_path.name], main='coursier.echo.Echo', args=['Hello World'], create_synthetic_jar=True)) # Rename the jar to contain reserved characters. new_path = os.path.join(os.path.dirname(temp_path.name), "%%!!!===++.jar") safe_concurrent_rename(temp_path.name, new_path) # Executing the new path should work. self.assertEquals(0, util.execute_java( executor=executor, classpath=[new_path], main='coursier.echo.Echo', args=['Hello World'], create_synthetic_jar=True))
def test_download_listener(self): with self.expect_get('http://foo', chunk_size_bytes=1048576, timeout_secs=3600) as (chunks, expected_listener_calls): with closing(BytesIO()) as fp: self.fetcher.fetch('http://foo', Fetcher.DownloadListener(fp).wrap(self.listener), chunk_size_bytes=1024 * 1024, timeout_secs=60 * 60) downloaded = self.concat_chunks(chunks) self.assertEqual(downloaded, fp.getvalue()) self.assert_listener_calls(expected_listener_calls, chunks) self.response.close.assert_called_once_with()
def test_download_listener(self): downloaded = '' for chunk in self.expect_get('http://foo', chunk_size_bytes=1048576, timeout_secs=3600): self.listener.recv_chunk(chunk) downloaded += chunk self.listener.finished() self.response.close() self.mox.ReplayAll() with closing(StringIO()) as fp: self.fetcher.fetch('http://foo', Fetcher.DownloadListener(fp).wrap(self.listener), chunk_size_bytes=1024 * 1024, timeout_secs=60 * 60) self.assertEqual(downloaded, fp.getvalue())
def test_checksum_listener(self): digest = self.mox.CreateMockAnything() for chunk in self.expect_get('http://baz', chunk_size_bytes=1, timeout_secs=37): self.listener.recv_chunk(chunk) digest.update(chunk) self.listener.finished() digest.hexdigest().AndReturn('42') self.response.close() self.mox.ReplayAll() checksum_listener = Fetcher.ChecksumListener(digest=digest) self.fetcher.fetch('http://baz', checksum_listener.wrap(self.listener), chunk_size_bytes=1, timeout_secs=37) self.assertEqual('42', checksum_listener.checksum)
def test_download_listener(self): downloaded = '' for chunk in self.expect_get('http://foo', chunk_size_bytes=1048576, timeout_secs=3600): self.listener.recv_chunk(chunk) downloaded += chunk self.listener.finished() self.response.close() self.mox.ReplayAll() with closing(Compatibility.StringIO()) as fp: self.fetcher.fetch('http://foo', Fetcher.DownloadListener(fp).wrap( self.listener), chunk_size=Amount(1, Data.MB), timeout=Amount(1, Time.HOURS)) self.assertEqual(downloaded, fp.getvalue())
class FetcherTest(unittest.TestCase): def setUp(self): self.requests = mock.Mock(spec=requests.Session) self.response = mock.Mock(spec=requests.Response) self.fetcher = Fetcher('/unused/root/dir', requests_api=self.requests) self.listener = mock.create_autospec(Fetcher.Listener, spec_set=True) def status_call(self, status_code, content_length=None): return mock.call.status(status_code, content_length=content_length) def ok_call(self, chunks): return self.status_call(200, content_length=sum(len(c) for c in chunks)) def assert_listener_calls(self, expected_listener_calls, chunks, expect_finished=True): expected_listener_calls.extend( mock.call.recv_chunk(chunk) for chunk in chunks) if expect_finished: expected_listener_calls.append(mock.call.finished()) self.assertEqual(expected_listener_calls, self.listener.method_calls) def assert_local_file_fetch(self, url_prefix=''): chunks = ['0123456789', 'a'] with temporary_file() as fp: for chunk in chunks: fp.write(chunk) fp.close() self.fetcher.fetch(url_prefix + fp.name, self.listener, chunk_size_bytes=10) self.assert_listener_calls([self.ok_call(chunks)], chunks) self.requests.assert_not_called() def test_file_path(self): self.assert_local_file_fetch() def test_file_scheme(self): self.assert_local_file_fetch('file:') def assert_local_file_fetch_relative(self, url, *rel_path): expected_contents = b'proof' with temporary_dir() as root_dir: with safe_open(os.path.join(root_dir, *rel_path), 'wb') as fp: fp.write(expected_contents) with temporary_file() as download_fp: Fetcher(root_dir).download(url, path_or_fd=download_fp) download_fp.close() with open(download_fp.name, 'rb') as fp: self.assertEqual(expected_contents, fp.read()) def test_file_scheme_double_slash_relative(self): self.assert_local_file_fetch_relative('file://relative/path', 'relative', 'path') def test_file_scheme_embedded_double_slash(self): self.assert_local_file_fetch_relative('file://a//strange//path', 'a', 'strange', 'path') def test_file_scheme_triple_slash(self): self.assert_local_file_fetch('file://') def test_file_dne(self): with temporary_dir() as base: with self.assertRaises(self.fetcher.PermanentError): self.fetcher.fetch(os.path.join(base, 'dne'), self.listener) def test_file_no_perms(self): with temporary_dir() as base: no_perms = os.path.join(base, 'dne') touch(no_perms) os.chmod(no_perms, 0) self.assertTrue(os.path.exists(no_perms)) with self.assertRaises(self.fetcher.PermanentError): self.fetcher.fetch(no_perms, self.listener) @contextmanager def expect_get(self, url, chunk_size_bytes, timeout_secs, chunks=None, listener=True): chunks = chunks or ['0123456789', 'a'] size = sum(len(c) for c in chunks) self.requests.get.return_value = self.response self.response.status_code = 200 self.response.headers = {'content-length': str(size)} self.response.iter_content.return_value = chunks yield chunks, [self.ok_call(chunks)] if listener else [] self.requests.get.expect_called_once_with(url, allow_redirects=True, stream=True, timeout=timeout_secs) self.response.iter_content.expect_called_once_with( chunk_size=chunk_size_bytes) def test_get(self): with self.expect_get('http://bar', chunk_size_bytes=1024, timeout_secs=60) as (chunks, expected_listener_calls): self.fetcher.fetch('http://bar', self.listener, chunk_size_bytes=1024, timeout_secs=60) self.assert_listener_calls(expected_listener_calls, chunks) self.response.close.expect_called_once_with() def test_checksum_listener(self): digest = mock.Mock(spec=hashlib.md5()) digest.hexdigest.return_value = '42' checksum_listener = Fetcher.ChecksumListener(digest=digest) with self.expect_get('http://baz', chunk_size_bytes=1, timeout_secs=37) as (chunks, expected_listener_calls): self.fetcher.fetch('http://baz', checksum_listener.wrap(self.listener), chunk_size_bytes=1, timeout_secs=37) self.assertEqual('42', checksum_listener.checksum) def expected_digest_calls(): for chunk in chunks: yield mock.call.update(chunk) yield mock.call.hexdigest() self.assertEqual(list(expected_digest_calls()), digest.method_calls) self.assert_listener_calls(expected_listener_calls, chunks) self.response.close.assert_called_once_with() def concat_chunks(self, chunks): return reduce(lambda acc, c: acc + c, chunks, '') def test_download_listener(self): with self.expect_get('http://foo', chunk_size_bytes=1048576, timeout_secs=3600) as (chunks, expected_listener_calls): with closing(StringIO()) as fp: self.fetcher.fetch('http://foo', Fetcher.DownloadListener(fp).wrap( self.listener), chunk_size_bytes=1024 * 1024, timeout_secs=60 * 60) downloaded = self.concat_chunks(chunks) self.assertEqual(downloaded, fp.getvalue()) self.assert_listener_calls(expected_listener_calls, chunks) self.response.close.assert_called_once_with() def test_size_mismatch(self): self.requests.get.return_value = self.response self.response.status_code = 200 self.response.headers = {'content-length': '11'} chunks = ['a', 'b'] self.response.iter_content.return_value = chunks with self.assertRaises(self.fetcher.Error): self.fetcher.fetch('http://foo', self.listener, chunk_size_bytes=1024, timeout_secs=60) self.requests.get.assert_called_once_with('http://foo', allow_redirects=True, stream=True, timeout=60) self.response.iter_content.assert_called_once_with(chunk_size=1024) self.assert_listener_calls([self.status_call(200, content_length=11)], chunks, expect_finished=False) self.response.close.assert_called_once_with() def test_get_error_transient(self): self.requests.get.side_effect = requests.ConnectionError with self.assertRaises(self.fetcher.TransientError): self.fetcher.fetch('http://foo', self.listener, chunk_size_bytes=1024, timeout_secs=60) self.requests.get.assert_called_once_with('http://foo', allow_redirects=True, stream=True, timeout=60) def test_get_error_permanent(self): self.requests.get.side_effect = requests.TooManyRedirects with self.assertRaises(self.fetcher.PermanentError) as e: self.fetcher.fetch('http://foo', self.listener, chunk_size_bytes=1024, timeout_secs=60) self.assertTrue(e.exception.response_code is None) self.requests.get.assert_called_once_with('http://foo', allow_redirects=True, stream=True, timeout=60) def test_http_error(self): self.requests.get.return_value = self.response self.response.status_code = 404 with self.assertRaises(self.fetcher.PermanentError) as e: self.fetcher.fetch('http://foo', self.listener, chunk_size_bytes=1024, timeout_secs=60) self.assertEqual(404, e.exception.response_code) self.requests.get.expect_called_once_with('http://foo', allow_redirects=True, stream=True, timeout=60) self.listener.status.expect_called_once_with(404) self.response.close.expect_called_once_with() def test_iter_content_error(self): self.requests.get.return_value = self.response self.response.status_code = 200 self.response.headers = {} self.response.iter_content.side_effect = requests.Timeout with self.assertRaises(self.fetcher.TransientError): self.fetcher.fetch('http://foo', self.listener, chunk_size_bytes=1024, timeout_secs=60) self.requests.get.expect_called_once_with('http://foo', allow_redirects=True, stream=True, timeout=60) self.response.iter_content.expect_called_once_with(chunk_size=1024) self.listener.status.expect_called_once_with(200, content_length=None) self.response.close.expect_called_once_with() def expect_download(self, path_or_fd=None): with self.expect_get('http://1', chunk_size_bytes=13, timeout_secs=13, listener=False) as (chunks, expected_listener_calls): path = self.fetcher.download('http://1', path_or_fd=path_or_fd, chunk_size_bytes=13, timeout_secs=13) self.response.close.expect_called_once_with() downloaded = self.concat_chunks(chunks) return downloaded, path def test_download(self): downloaded, path = self.expect_download() try: with open(path) as fp: self.assertEqual(downloaded, fp.read()) finally: os.unlink(path) def test_download_fd(self): with temporary_file() as fd: downloaded, path = self.expect_download(path_or_fd=fd) self.assertEqual(path, fd.name) fd.close() with open(path) as fp: self.assertEqual(downloaded, fp.read()) def test_download_path(self): with temporary_file() as fd: fd.close() downloaded, path = self.expect_download(path_or_fd=fd.name) self.assertEqual(path, fd.name) with open(path) as fp: self.assertEqual(downloaded, fp.read()) @mock.patch('time.time') def test_progress_listener(self, timer): timer.side_effect = [0, 1.137] stream = StringIO() progress_listener = Fetcher.ProgressListener(width=5, chunk_size_bytes=1, stream=stream) with self.expect_get('http://baz', chunk_size_bytes=1, timeout_secs=37, chunks=[[1]] * 1024) as (chunks, expected_listener_calls): self.fetcher.fetch('http://baz', progress_listener.wrap(self.listener), chunk_size_bytes=1, timeout_secs=37) self.assert_listener_calls(expected_listener_calls, chunks) # We just test the last progress line which should indicate a 100% complete download. # We control progress bar width (5 dots), size (1KB) and total time downloading (fake 1.137s). self.assertEqual('100% ..... 1 KB 1.137s\n', stream.getvalue().split('\r')[-1])
def resolve_target(self, node_task, target, results_dir, node_paths, resolve_locally=False, **kwargs): if not resolve_locally: self._copy_sources(target, results_dir) with temporary_dir() as temp_dir: archive_file_name = urllib.parse.urlsplit( target.dependencies_archive_url).path.split("/")[-1] if not archive_file_name: raise TaskError( "Could not determine archive file name for {target} from {url}" .format(target=target.address.reference(), url=target.dependencies_archive_url)) download_path = os.path.join(temp_dir, archive_file_name) node_task.context.log.info( "Downloading archive {archive_file_name} from " "{dependencies_archive_url} to {path}".format( archive_file_name=archive_file_name, dependencies_archive_url=target.dependencies_archive_url, path=download_path, )) try: Fetcher(get_buildroot()).download( target.dependencies_archive_url, listener=Fetcher.ProgressListener(), path_or_fd=download_path, timeout_secs=self.get_options().fetch_timeout_secs, ) except Fetcher.Error as error: raise TaskError( "Failed to fetch preinstalled node_modules for {target} from {url}: {error}" .format( target=target.address.reference(), url=target.dependencies_archive_url, error=error, )) node_task.context.log.info( "Fetched archive {archive_file_name} from {dependencies_archive_url} to {path}" .format( archive_file_name=archive_file_name, dependencies_archive_url=target.dependencies_archive_url, path=download_path, )) archiver_for_path(archive_file_name).extract( download_path, temp_dir) extracted_node_modules = os.path.join(temp_dir, "node_modules") if not os.path.isdir(extracted_node_modules): raise TaskError( "Did not find an extracted node_modules directory for {target} " "inside {dependencies_archive_url}".format( target=target.address.reference(), dependencies_archive_url=target. dependencies_archive_url, )) # shutil.move doesn't handle directory collision nicely. This is mainly to address # installing within the source directory for local resolves. node_modules_path = os.path.join(results_dir, "node_modules") safe_rmtree(node_modules_path) shutil.move(extracted_node_modules, node_modules_path)
class FetcherTest(mox.MoxTestBase): def setUp(self): super(FetcherTest, self).setUp() self.requests = self.mox.CreateMockAnything() self.response = self.mox.CreateMock(requests.Response) self.fetcher = Fetcher(requests_api=self.requests) self.listener = self.mox.CreateMock(Fetcher.Listener) def expect_get(self, url, chunk_size_bytes, timeout_secs, listener=True): self.requests.get(url, stream=True, timeout=timeout_secs).AndReturn(self.response) self.response.status_code = 200 self.response.headers = {"content-length": "11"} if listener: self.listener.status(200, content_length=11) chunks = ["0123456789", "a"] self.response.iter_content(chunk_size=chunk_size_bytes).AndReturn(chunks) return chunks def test_get(self): for chunk in self.expect_get("http://bar", chunk_size_bytes=1024, timeout_secs=60): self.listener.recv_chunk(chunk) self.listener.finished() self.response.close() self.mox.ReplayAll() self.fetcher.fetch("http://bar", self.listener, chunk_size=Amount(1, Data.KB), timeout=Amount(1, Time.MINUTES)) def test_checksum_listener(self): digest = self.mox.CreateMockAnything() for chunk in self.expect_get("http://baz", chunk_size_bytes=1, timeout_secs=37): self.listener.recv_chunk(chunk) digest.update(chunk) self.listener.finished() digest.hexdigest().AndReturn("42") self.response.close() self.mox.ReplayAll() checksum_listener = Fetcher.ChecksumListener(digest=digest) self.fetcher.fetch( "http://baz", checksum_listener.wrap(self.listener), chunk_size=Amount(1, Data.BYTES), timeout=Amount(37, Time.SECONDS), ) self.assertEqual("42", checksum_listener.checksum) def test_download_listener(self): downloaded = "" for chunk in self.expect_get("http://foo", chunk_size_bytes=1048576, timeout_secs=3600): self.listener.recv_chunk(chunk) downloaded += chunk self.listener.finished() self.response.close() self.mox.ReplayAll() with closing(Compatibility.StringIO()) as fp: self.fetcher.fetch( "http://foo", Fetcher.DownloadListener(fp).wrap(self.listener), chunk_size=Amount(1, Data.MB), timeout=Amount(1, Time.HOURS), ) self.assertEqual(downloaded, fp.getvalue()) def test_size_mismatch(self): self.requests.get("http://foo", stream=True, timeout=60).AndReturn(self.response) self.response.status_code = 200 self.response.headers = {"content-length": "11"} self.listener.status(200, content_length=11) self.response.iter_content(chunk_size=1024).AndReturn(["a", "b"]) self.listener.recv_chunk("a") self.listener.recv_chunk("b") self.response.close() self.mox.ReplayAll() with pytest.raises(self.fetcher.Error): self.fetcher.fetch( "http://foo", self.listener, chunk_size=Amount(1, Data.KB), timeout=Amount(1, Time.MINUTES) ) def test_get_error_transient(self): self.requests.get("http://foo", stream=True, timeout=60).AndRaise(requests.ConnectionError) self.mox.ReplayAll() with pytest.raises(self.fetcher.TransientError): self.fetcher.fetch( "http://foo", self.listener, chunk_size=Amount(1, Data.KB), timeout=Amount(1, Time.MINUTES) ) def test_get_error_permanent(self): self.requests.get("http://foo", stream=True, timeout=60).AndRaise(requests.TooManyRedirects) self.mox.ReplayAll() with pytest.raises(self.fetcher.PermanentError) as e: self.fetcher.fetch( "http://foo", self.listener, chunk_size=Amount(1, Data.KB), timeout=Amount(1, Time.MINUTES) ) self.assertTrue(e.value.response_code is None) def test_http_error(self): self.requests.get("http://foo", stream=True, timeout=60).AndReturn(self.response) self.response.status_code = 404 self.listener.status(404) self.response.close() self.mox.ReplayAll() with pytest.raises(self.fetcher.PermanentError) as e: self.fetcher.fetch( "http://foo", self.listener, chunk_size=Amount(1, Data.KB), timeout=Amount(1, Time.MINUTES) ) self.assertEqual(404, e.value.response_code) def test_iter_content_error(self): self.requests.get("http://foo", stream=True, timeout=60).AndReturn(self.response) self.response.status_code = 200 self.response.headers = {} self.listener.status(200, content_length=None) self.response.iter_content(chunk_size=1024).AndRaise(requests.Timeout) self.response.close() self.mox.ReplayAll() with pytest.raises(self.fetcher.TransientError): self.fetcher.fetch( "http://foo", self.listener, chunk_size=Amount(1, Data.KB), timeout=Amount(1, Time.MINUTES) ) def expect_download(self, path_or_fd=None): downloaded = "" for chunk in self.expect_get("http://1", chunk_size_bytes=13, timeout_secs=13, listener=False): downloaded += chunk self.response.close() self.mox.ReplayAll() path = self.fetcher.download( "http://1", path_or_fd=path_or_fd, chunk_size=Amount(13, Data.BYTES), timeout=Amount(13, Time.SECONDS) ) return downloaded, path def test_download(self): downloaded, path = self.expect_download() try: with open(path) as fp: self.assertEqual(downloaded, fp.read()) finally: os.unlink(path) def test_download_fd(self): with temporary_file() as fd: downloaded, path = self.expect_download(path_or_fd=fd) self.assertEqual(path, fd.name) fd.close() with open(path) as fp: self.assertEqual(downloaded, fp.read()) def test_download_path(self): with temporary_file() as fd: fd.close() downloaded, path = self.expect_download(path_or_fd=fd.name) self.assertEqual(path, fd.name) with open(path) as fp: self.assertEqual(downloaded, fp.read())
class FetcherTest(mox.MoxTestBase): def setUp(self): super(FetcherTest, self).setUp() self.requests = self.mox.CreateMockAnything() self.response = self.mox.CreateMock(requests.Response) self.fetcher = Fetcher(requests_api=self.requests) self.listener = self.mox.CreateMock(Fetcher.Listener) def expect_get(self, url, chunk_size_bytes, timeout_secs, listener=True): self.requests.get(url, stream=True, timeout=timeout_secs).AndReturn(self.response) self.response.status_code = 200 self.response.headers = {'content-length': '11'} if listener: self.listener.status(200, content_length=11) chunks = ['0123456789', 'a'] self.response.iter_content( chunk_size=chunk_size_bytes).AndReturn(chunks) return chunks def test_get(self): for chunk in self.expect_get('http://bar', chunk_size_bytes=1024, timeout_secs=60): self.listener.recv_chunk(chunk) self.listener.finished() self.response.close() self.mox.ReplayAll() self.fetcher.fetch('http://bar', self.listener, chunk_size=Amount(1, Data.KB), timeout=Amount(1, Time.MINUTES)) def test_checksum_listener(self): digest = self.mox.CreateMockAnything() for chunk in self.expect_get('http://baz', chunk_size_bytes=1, timeout_secs=37): self.listener.recv_chunk(chunk) digest.update(chunk) self.listener.finished() digest.hexdigest().AndReturn('42') self.response.close() self.mox.ReplayAll() checksum_listener = Fetcher.ChecksumListener(digest=digest) self.fetcher.fetch('http://baz', checksum_listener.wrap(self.listener), chunk_size=Amount(1, Data.BYTES), timeout=Amount(37, Time.SECONDS)) self.assertEqual('42', checksum_listener.checksum) def test_download_listener(self): downloaded = '' for chunk in self.expect_get('http://foo', chunk_size_bytes=1048576, timeout_secs=3600): self.listener.recv_chunk(chunk) downloaded += chunk self.listener.finished() self.response.close() self.mox.ReplayAll() with closing(Compatibility.StringIO()) as fp: self.fetcher.fetch('http://foo', Fetcher.DownloadListener(fp).wrap( self.listener), chunk_size=Amount(1, Data.MB), timeout=Amount(1, Time.HOURS)) self.assertEqual(downloaded, fp.getvalue()) def test_size_mismatch(self): self.requests.get('http://foo', stream=True, timeout=60).AndReturn(self.response) self.response.status_code = 200 self.response.headers = {'content-length': '11'} self.listener.status(200, content_length=11) self.response.iter_content(chunk_size=1024).AndReturn(['a', 'b']) self.listener.recv_chunk('a') self.listener.recv_chunk('b') self.response.close() self.mox.ReplayAll() with pytest.raises(self.fetcher.Error): self.fetcher.fetch('http://foo', self.listener, chunk_size=Amount(1, Data.KB), timeout=Amount(1, Time.MINUTES)) def test_get_error_transient(self): self.requests.get('http://foo', stream=True, timeout=60).AndRaise(requests.ConnectionError) self.mox.ReplayAll() with pytest.raises(self.fetcher.TransientError): self.fetcher.fetch('http://foo', self.listener, chunk_size=Amount(1, Data.KB), timeout=Amount(1, Time.MINUTES)) def test_get_error_permanent(self): self.requests.get('http://foo', stream=True, timeout=60).AndRaise(requests.TooManyRedirects) self.mox.ReplayAll() with pytest.raises(self.fetcher.PermanentError) as e: self.fetcher.fetch('http://foo', self.listener, chunk_size=Amount(1, Data.KB), timeout=Amount(1, Time.MINUTES)) self.assertTrue(e.value.response_code is None) def test_http_error(self): self.requests.get('http://foo', stream=True, timeout=60).AndReturn(self.response) self.response.status_code = 404 self.listener.status(404) self.response.close() self.mox.ReplayAll() with pytest.raises(self.fetcher.PermanentError) as e: self.fetcher.fetch('http://foo', self.listener, chunk_size=Amount(1, Data.KB), timeout=Amount(1, Time.MINUTES)) self.assertEqual(404, e.value.response_code) def test_iter_content_error(self): self.requests.get('http://foo', stream=True, timeout=60).AndReturn(self.response) self.response.status_code = 200 self.response.headers = {} self.listener.status(200, content_length=None) self.response.iter_content(chunk_size=1024).AndRaise(requests.Timeout) self.response.close() self.mox.ReplayAll() with pytest.raises(self.fetcher.TransientError): self.fetcher.fetch('http://foo', self.listener, chunk_size=Amount(1, Data.KB), timeout=Amount(1, Time.MINUTES)) def expect_download(self, path_or_fd=None): downloaded = '' for chunk in self.expect_get('http://1', chunk_size_bytes=13, timeout_secs=13, listener=False): downloaded += chunk self.response.close() self.mox.ReplayAll() path = self.fetcher.download('http://1', path_or_fd=path_or_fd, chunk_size=Amount(13, Data.BYTES), timeout=Amount(13, Time.SECONDS)) return downloaded, path def test_download(self): downloaded, path = self.expect_download() try: with open(path) as fp: self.assertEqual(downloaded, fp.read()) finally: os.unlink(path) def test_download_fd(self): with temporary_file() as fd: downloaded, path = self.expect_download(path_or_fd=fd) self.assertEqual(path, fd.name) fd.close() with open(path) as fp: self.assertEqual(downloaded, fp.read()) def test_download_path(self): with temporary_file() as fd: fd.close() downloaded, path = self.expect_download(path_or_fd=fd.name) self.assertEqual(path, fd.name) with open(path) as fp: self.assertEqual(downloaded, fp.read())
class FetcherTest(mox.MoxTestBase): def setUp(self): super(FetcherTest, self).setUp() self.requests = self.mox.CreateMockAnything() self.response = self.mox.CreateMock(requests.Response) self.fetcher = Fetcher(requests_api=self.requests) self.listener = self.mox.CreateMock(Fetcher.Listener) def expect_get(self, url, chunk_size_bytes, timeout_secs, listener=True): self.requests.get(url, allow_redirects=True, stream=True, timeout=timeout_secs).AndReturn(self.response) self.response.status_code = 200 self.response.headers = {'content-length': '11'} if listener: self.listener.status(200, content_length=11) chunks = ['0123456789', 'a'] self.response.iter_content(chunk_size=chunk_size_bytes).AndReturn(chunks) return chunks def test_get(self): for chunk in self.expect_get('http://bar', chunk_size_bytes=1024, timeout_secs=60): self.listener.recv_chunk(chunk) self.listener.finished() self.response.close() self.mox.ReplayAll() self.fetcher.fetch('http://bar', self.listener, chunk_size_bytes=1024, timeout_secs=60) def test_checksum_listener(self): digest = self.mox.CreateMockAnything() for chunk in self.expect_get('http://baz', chunk_size_bytes=1, timeout_secs=37): self.listener.recv_chunk(chunk) digest.update(chunk) self.listener.finished() digest.hexdigest().AndReturn('42') self.response.close() self.mox.ReplayAll() checksum_listener = Fetcher.ChecksumListener(digest=digest) self.fetcher.fetch('http://baz', checksum_listener.wrap(self.listener), chunk_size_bytes=1, timeout_secs=37) self.assertEqual('42', checksum_listener.checksum) def test_download_listener(self): downloaded = '' for chunk in self.expect_get('http://foo', chunk_size_bytes=1048576, timeout_secs=3600): self.listener.recv_chunk(chunk) downloaded += chunk self.listener.finished() self.response.close() self.mox.ReplayAll() with closing(StringIO()) as fp: self.fetcher.fetch('http://foo', Fetcher.DownloadListener(fp).wrap(self.listener), chunk_size_bytes=1024 * 1024, timeout_secs=60 * 60) self.assertEqual(downloaded, fp.getvalue()) def test_size_mismatch(self): self.requests.get('http://foo', allow_redirects=True, stream=True, timeout=60).AndReturn(self.response) self.response.status_code = 200 self.response.headers = {'content-length': '11'} self.listener.status(200, content_length=11) self.response.iter_content(chunk_size=1024).AndReturn(['a', 'b']) self.listener.recv_chunk('a') self.listener.recv_chunk('b') self.response.close() self.mox.ReplayAll() with self.assertRaises(self.fetcher.Error): self.fetcher.fetch('http://foo', self.listener, chunk_size_bytes=1024, timeout_secs=60) def test_get_error_transient(self): self.requests.get('http://foo', allow_redirects=True, stream=True, timeout=60).AndRaise(requests.ConnectionError) self.mox.ReplayAll() with self.assertRaises(self.fetcher.TransientError): self.fetcher.fetch('http://foo', self.listener, chunk_size_bytes=1024, timeout_secs=60) def test_get_error_permanent(self): self.requests.get('http://foo', allow_redirects=True, stream=True, timeout=60).AndRaise(requests.TooManyRedirects) self.mox.ReplayAll() with self.assertRaises(self.fetcher.PermanentError) as e: self.fetcher.fetch('http://foo', self.listener, chunk_size_bytes=1024, timeout_secs=60) self.assertTrue(e.exception.response_code is None) def test_http_error(self): self.requests.get('http://foo', allow_redirects=True, stream=True, timeout=60).AndReturn(self.response) self.response.status_code = 404 self.listener.status(404) self.response.close() self.mox.ReplayAll() with self.assertRaises(self.fetcher.PermanentError) as e: self.fetcher.fetch('http://foo', self.listener, chunk_size_bytes=1024, timeout_secs=60) self.assertEqual(404, e.exception.response_code) def test_iter_content_error(self): self.requests.get('http://foo', allow_redirects=True, stream=True, timeout=60).AndReturn(self.response) self.response.status_code = 200 self.response.headers = {} self.listener.status(200, content_length=None) self.response.iter_content(chunk_size=1024).AndRaise(requests.Timeout) self.response.close() self.mox.ReplayAll() with self.assertRaises(self.fetcher.TransientError): self.fetcher.fetch('http://foo', self.listener, chunk_size_bytes=1024, timeout_secs=60) def expect_download(self, path_or_fd=None): downloaded = '' for chunk in self.expect_get('http://1', chunk_size_bytes=13, timeout_secs=13, listener=False): downloaded += chunk self.response.close() self.mox.ReplayAll() path = self.fetcher.download('http://1', path_or_fd=path_or_fd, chunk_size_bytes=13, timeout_secs=13) return downloaded, path def test_download(self): downloaded, path = self.expect_download() try: with open(path) as fp: self.assertEqual(downloaded, fp.read()) finally: os.unlink(path) def test_download_fd(self): with temporary_file() as fd: downloaded, path = self.expect_download(path_or_fd=fd) self.assertEqual(path, fd.name) fd.close() with open(path) as fp: self.assertEqual(downloaded, fp.read()) def test_download_path(self): with temporary_file() as fd: fd.close() downloaded, path = self.expect_download(path_or_fd=fd.name) self.assertEqual(path, fd.name) with open(path) as fp: self.assertEqual(downloaded, fp.read())
def _default_http_fetcher(cls): """Return a fetcher that resolves local file paths against the build root. Currently this is used everywhere except in testing. """ return Fetcher(get_buildroot())
def setUp(self): self.requests = mock.Mock(spec=requests.Session) self.response = mock.Mock(spec=requests.Response) self.fetcher = Fetcher('/unused/root/dir', requests_api=self.requests) self.listener = mock.create_autospec(Fetcher.Listener, spec_set=True)
class FetcherTest(unittest.TestCase): def setUp(self): self.requests = mock.Mock(spec=requests.Session) self.response = mock.Mock(spec=requests.Response) self.fetcher = Fetcher('/unused/root/dir', requests_api=self.requests) self.listener = mock.create_autospec(Fetcher.Listener, spec_set=True) def status_call(self, status_code, content_length=None): return mock.call.status(status_code, content_length=content_length) def ok_call(self, chunks): return self.status_call(200, content_length=sum(len(c) for c in chunks)) def assert_listener_calls(self, expected_listener_calls, chunks, expect_finished=True): expected_listener_calls.extend(mock.call.recv_chunk(chunk) for chunk in chunks) if expect_finished: expected_listener_calls.append(mock.call.finished()) self.assertEqual(expected_listener_calls, self.listener.method_calls) def assert_local_file_fetch(self, url_prefix=''): chunks = [b'0123456789', b'a'] with temporary_file() as fp: for chunk in chunks: fp.write(chunk) fp.close() self.fetcher.fetch(url_prefix + fp.name, self.listener, chunk_size_bytes=10) self.assert_listener_calls([self.ok_call(chunks)], chunks) self.requests.assert_not_called() def test_file_path(self): self.assert_local_file_fetch() def test_file_scheme(self): self.assert_local_file_fetch('file:') def assert_local_file_fetch_relative(self, url, *rel_path): expected_contents = b'proof' with temporary_dir() as root_dir: with safe_open(os.path.join(root_dir, *rel_path), 'wb') as fp: fp.write(expected_contents) with temporary_file() as download_fp: Fetcher(root_dir).download(url, path_or_fd=download_fp) download_fp.close() with open(download_fp.name, 'rb') as fp: self.assertEqual(expected_contents, fp.read()) def test_file_scheme_double_slash_relative(self): self.assert_local_file_fetch_relative('file://relative/path', 'relative', 'path') def test_file_scheme_embedded_double_slash(self): self.assert_local_file_fetch_relative('file://a//strange//path', 'a', 'strange', 'path') def test_file_scheme_triple_slash(self): self.assert_local_file_fetch('file://') def test_file_dne(self): with temporary_dir() as base: with self.assertRaises(self.fetcher.PermanentError): self.fetcher.fetch(os.path.join(base, 'dne'), self.listener) def test_file_no_perms(self): with temporary_dir() as base: no_perms = os.path.join(base, 'dne') touch(no_perms) os.chmod(no_perms, 0) self.assertTrue(os.path.exists(no_perms)) with self.assertRaises(self.fetcher.PermanentError): self.fetcher.fetch(no_perms, self.listener) @contextmanager def expect_get(self, url, chunk_size_bytes, timeout_secs, chunks=None, listener=True): chunks = chunks or [b'0123456789', b'a'] size = sum(len(c) for c in chunks) self.requests.get.return_value = self.response self.response.status_code = 200 self.response.headers = {'content-length': str(size)} self.response.iter_content.return_value = chunks yield chunks, [self.ok_call(chunks)] if listener else [] self.requests.get.expect_called_once_with(url, allow_redirects=True, stream=True, timeout=timeout_secs) self.response.iter_content.expect_called_once_with(chunk_size=chunk_size_bytes) def test_get(self): with self.expect_get('http://bar', chunk_size_bytes=1024, timeout_secs=60) as (chunks, expected_listener_calls): self.fetcher.fetch('http://bar', self.listener, chunk_size_bytes=1024, timeout_secs=60) self.assert_listener_calls(expected_listener_calls, chunks) self.response.close.expect_called_once_with() def test_checksum_listener(self): digest = mock.Mock(spec=hashlib.md5()) digest.hexdigest.return_value = '42' checksum_listener = Fetcher.ChecksumListener(digest=digest) with self.expect_get('http://baz', chunk_size_bytes=1, timeout_secs=37) as (chunks, expected_listener_calls): self.fetcher.fetch('http://baz', checksum_listener.wrap(self.listener), chunk_size_bytes=1, timeout_secs=37) self.assertEqual('42', checksum_listener.checksum) def expected_digest_calls(): for chunk in chunks: yield mock.call.update(chunk) yield mock.call.hexdigest() self.assertEqual(list(expected_digest_calls()), digest.method_calls) self.assert_listener_calls(expected_listener_calls, chunks) self.response.close.assert_called_once_with() def concat_chunks(self, chunks): return reduce(lambda acc, c: acc + c, chunks, b'') def test_download_listener(self): with self.expect_get('http://foo', chunk_size_bytes=1048576, timeout_secs=3600) as (chunks, expected_listener_calls): with closing(BytesIO()) as fp: self.fetcher.fetch('http://foo', Fetcher.DownloadListener(fp).wrap(self.listener), chunk_size_bytes=1024 * 1024, timeout_secs=60 * 60) downloaded = self.concat_chunks(chunks) self.assertEqual(downloaded, fp.getvalue()) self.assert_listener_calls(expected_listener_calls, chunks) self.response.close.assert_called_once_with() def test_size_mismatch(self): self.requests.get.return_value = self.response self.response.status_code = 200 self.response.headers = {'content-length': '11'} chunks = ['a', 'b'] self.response.iter_content.return_value = chunks with self.assertRaises(self.fetcher.Error): self.fetcher.fetch('http://foo', self.listener, chunk_size_bytes=1024, timeout_secs=60) self.requests.get.assert_called_once_with('http://foo', allow_redirects=True, stream=True, timeout=60) self.response.iter_content.assert_called_once_with(chunk_size=1024) self.assert_listener_calls([self.status_call(200, content_length=11)], chunks, expect_finished=False) self.response.close.assert_called_once_with() def test_get_error_transient(self): self.requests.get.side_effect = requests.ConnectionError with self.assertRaises(self.fetcher.TransientError): self.fetcher.fetch('http://foo', self.listener, chunk_size_bytes=1024, timeout_secs=60) self.requests.get.assert_called_once_with('http://foo', allow_redirects=True, stream=True, timeout=60) def test_get_error_permanent(self): self.requests.get.side_effect = requests.TooManyRedirects with self.assertRaises(self.fetcher.PermanentError) as e: self.fetcher.fetch('http://foo', self.listener, chunk_size_bytes=1024, timeout_secs=60) self.assertTrue(e.exception.response_code is None) self.requests.get.assert_called_once_with('http://foo', allow_redirects=True, stream=True, timeout=60) def test_http_error(self): self.requests.get.return_value = self.response self.response.status_code = 404 with self.assertRaises(self.fetcher.PermanentError) as e: self.fetcher.fetch('http://foo', self.listener, chunk_size_bytes=1024, timeout_secs=60) self.assertEqual(404, e.exception.response_code) self.requests.get.expect_called_once_with('http://foo', allow_redirects=True, stream=True, timeout=60) self.listener.status.expect_called_once_with(404) self.response.close.expect_called_once_with() def test_iter_content_error(self): self.requests.get.return_value = self.response self.response.status_code = 200 self.response.headers = {} self.response.iter_content.side_effect = requests.Timeout with self.assertRaises(self.fetcher.TransientError): self.fetcher.fetch('http://foo', self.listener, chunk_size_bytes=1024, timeout_secs=60) self.requests.get.expect_called_once_with('http://foo', allow_redirects=True, stream=True, timeout=60) self.response.iter_content.expect_called_once_with(chunk_size=1024) self.listener.status.expect_called_once_with(200, content_length=None) self.response.close.expect_called_once_with() def expect_download(self, path_or_fd=None): with self.expect_get('http://1', chunk_size_bytes=13, timeout_secs=13, listener=False) as (chunks, expected_listener_calls): path = self.fetcher.download('http://1', path_or_fd=path_or_fd, chunk_size_bytes=13, timeout_secs=13) self.response.close.expect_called_once_with() downloaded = self.concat_chunks(chunks) return downloaded, path def test_download(self): downloaded, path = self.expect_download() try: with open(path, 'rb') as fp: self.assertEqual(downloaded, fp.read()) finally: os.unlink(path) def test_download_fd(self): with temporary_file() as fd: downloaded, path = self.expect_download(path_or_fd=fd) self.assertEqual(path, fd.name) fd.close() with open(path, 'rb') as fp: self.assertEqual(downloaded, fp.read()) def test_download_path(self): with temporary_file() as fd: fd.close() downloaded, path = self.expect_download(path_or_fd=fd.name) self.assertEqual(path, fd.name) with open(path, 'rb') as fp: self.assertEqual(downloaded, fp.read()) @mock.patch('time.time') def test_progress_listener(self, timer): timer.side_effect = [0, 1.137] stream = BytesIO() progress_listener = Fetcher.ProgressListener(width=5, chunk_size_bytes=1, stream=stream) with self.expect_get('http://baz', chunk_size_bytes=1, timeout_secs=37, chunks=[[1]] * 1024) as (chunks, expected_listener_calls): self.fetcher.fetch('http://baz', progress_listener.wrap(self.listener), chunk_size_bytes=1, timeout_secs=37) self.assert_listener_calls(expected_listener_calls, chunks) # We just test the last progress line which should indicate a 100% complete download. # We control progress bar width (5 dots), size (1KB) and total time downloading (fake 1.137s). self.assertEqual('100% ..... 1 KB 1.137s\n', stream.getvalue().decode('utf-8').split('\r')[-1])