def _get_single_path(self, path): """ Retrieve a single path within the upstream registry, and return its body after deserializing it as json :param path: a full http path to retrieve that will be urljoin'd to the upstream registry url. :type path: basestring :return: whatever gets deserialized out of the response body's json """ url = urlparse.urljoin(self.registry_url, path) request = DownloadRequest(url, StringIO()) if path.endswith('/images'): # this is required by the docker index and indicates that it should # return an auth token if request.headers is None: request.headers = {} request.headers[self.DOCKER_TOKEN_HEADER] = 'true' report = self.downloader.download_one(request) if report.state == report.DOWNLOAD_FAILED: raise IOError(report.error_msg) self._parse_response_headers(report.headers) return json.loads(report.destination.getvalue())
def _get_path(self, path): """ Retrieve a single path within the upstream registry, and return a 2-tuple of the headers and the response body. :param path: a full http path to retrieve that will be urljoin'd to the upstream registry url. :type path: basestring :return: (headers, response body) :rtype: tuple """ url = urlparse.urljoin(self.registry_url, path) _logger.debug(_('Retrieving {0}'.format(url))) request = DownloadRequest(url, StringIO()) if self.token: request.headers = token_util.update_auth_header(request.headers, self.token) report = self.downloader.download_one(request) # If the download was unauthorized, attempt to get a token and try again if report.state == report.DOWNLOAD_FAILED: if report.error_report.get('response_code') == httplib.UNAUTHORIZED: _logger.debug(_('Download unauthorized, attempting to retrieve a token.')) self.token = token_util.request_token(self.token_downloader, request, report.headers) request.headers = token_util.update_auth_header(request.headers, self.token) report = self.downloader.download_one(request) if report.state == report.DOWNLOAD_FAILED: self._raise_path_error(report) return report.headers, report.destination.getvalue()
def _get_path(self, path, headers=None): """ Retrieve a single path within the upstream registry, and return a 2-tuple of the headers and the response body. :param path: a full http path to retrieve that will be urljoin'd to the upstream registry url. :type path: basestring :param headers: headers sent in the request :type headers: dict :return: (headers, response body) :rtype: tuple """ url = urlparse.urljoin(self.registry_url, path) _logger.debug(_('Retrieving {0}'.format(url))) request = DownloadRequest(url, StringIO()) request.headers = headers if self.token: request.headers = auth_util.update_token_auth_header( request.headers, self.token) report = self.downloader.download_one(request) # If the download was unauthorized, check report header, if basic auth is expected # retry with basic auth, otherwise attempt to get a token and try again if report.state == report.DOWNLOAD_FAILED: if report.error_report.get( 'response_code') == httplib.UNAUTHORIZED: auth_header = report.headers.get('www-authenticate') if auth_header is None: raise IOError("401 responses are expected to " "contain authentication information") elif "Basic" in auth_header: _logger.debug( _('Download unauthorized, retrying with basic authentication' )) report = self.auth_downloader.download_one(request) else: _logger.debug( _('Download unauthorized, attempting to retrieve a token.' )) self.token = auth_util.request_token( self.auth_downloader, request, auth_header, self.name) request.headers = auth_util.update_token_auth_header( request.headers, self.token) report = self.downloader.download_one(request) if report.state == report.DOWNLOAD_FAILED: # this condition was added in case the registry would not allow to access v2 endpoint # but still token would be valid for other endpoints. # see https://pulp.plan.io/issues/2643 if path == '/v2/' and report.error_report.get( 'response_code') == httplib.UNAUTHORIZED: pass else: self._raise_path_error(report) return report.headers, report.destination.getvalue()
def test_request_cancel(self, mock_from_request): url = 'http://fakeurl/robots.txt' req = DownloadRequest(url, mock.Mock()) req.canceled = True self.downloader._fetch(req) mock_from_request.return_value.download_canceled.assert_called_once_with()
def test_request_cancel(self, mock_from_request): url = 'http://fakeurl/robots.txt' req = DownloadRequest(url, mock.Mock()) req.canceled = True self.downloader._fetch(req) mock_from_request.return_value.download_canceled.assert_called_once_with( )
def test_copy_canceled_single_request(self, mock_canceled, mock_open): downloader = local.LocalFileDownloader(DownloaderConfig()) request = DownloadRequest('file://' + __file__, '/bar') request.canceled = True downloader._copy(request) # make sure the cancel method was called on the report mock_canceled.assert_called_once_with() # make sure the no writing was attempted self.assertEqual(mock_open.return_value.write.call_count, 0)
def _create_download_requests(content_units): """ Make a list of Nectar DownloadRequests for the given content units using the lazy catalog. :param content_units: The content units to build a list of DownloadRequests for. :type content_units: list of pulp.server.db.model.FileContentUnit :return: A list of DownloadRequests; each request includes a ``data`` instance variable which is a dict containing the FileContentUnit, the list of files in the unit, and the downloaded file's storage path. :rtype: list of nectar.request.DownloadRequest """ requests = [] working_dir = get_working_directory() signing_key = Key.load(pulp_conf.get('authentication', 'rsa_key')) for content_unit in content_units: # All files in the unit; every request for a unit has a reference to this dict. unit_files = {} unit_working_dir = os.path.join(working_dir, content_unit.id) for file_path in content_unit.list_files(): qs = LazyCatalogEntry.objects.filter( unit_id=content_unit.id, unit_type_id=content_unit.type_id, path=file_path ) catalog_entry = qs.order_by('revision').first() if catalog_entry is None: continue signed_url = _get_streamer_url(catalog_entry, signing_key) temporary_destination = os.path.join( unit_working_dir, os.path.basename(catalog_entry.path) ) mkdir(unit_working_dir) unit_files[temporary_destination] = { CATALOG_ENTRY: catalog_entry, PATH_DOWNLOADED: None, } request = DownloadRequest(signed_url, temporary_destination) # For memory reasons, only hold onto the id and type_id so we can reload the unit # once it's successfully downloaded. request.data = { TYPE_ID: content_unit.type_id, UNIT_ID: content_unit.id, UNIT_FILES: unit_files, } requests.append(request) return requests
def _create_download_requests(content_units): """ Make a list of Nectar DownloadRequests for the given content units using the lazy catalog. :param content_units: The content units to build a list of DownloadRequests for. :type content_units: list of pulp.server.db.model.FileContentUnit :return: A list of DownloadRequests; each request includes a ``data`` instance variable which is a dict containing the FileContentUnit, the list of files in the unit, and the downloaded file's storage path. :rtype: list of nectar.request.DownloadRequest """ requests = [] working_dir = get_working_directory() signing_key = Key.load(pulp_conf.get('authentication', 'rsa_key')) for content_unit in content_units: # All files in the unit; every request for a unit has a reference to this dict. unit_files = {} unit_working_dir = os.path.join(working_dir, content_unit.id) for file_path in content_unit.list_files(): qs = LazyCatalogEntry.objects.filter( unit_id=content_unit.id, unit_type_id=content_unit.type_id, path=file_path) catalog_entry = qs.order_by('revision').first() if catalog_entry is None: continue signed_url = _get_streamer_url(catalog_entry, signing_key) temporary_destination = os.path.join( unit_working_dir, os.path.basename(catalog_entry.path)) mkdir(unit_working_dir) unit_files[temporary_destination] = { CATALOG_ENTRY: catalog_entry, PATH_DOWNLOADED: None, } request = DownloadRequest(signed_url, temporary_destination) # For memory reasons, only hold onto the id and type_id so we can reload the unit # once it's successfully downloaded. request.data = { TYPE_ID: content_unit.type_id, UNIT_ID: content_unit.id, UNIT_FILES: unit_files, } requests.append(request) return requests
def _download(self, urls): """ Download files by URL. Encapsulates nectar details and provides a simplified method of downloading files. :param urls: A list of tuples: (url, destination). The *url* and *destination* are both strings. The *destination* is the fully qualified path to where the file is to be downloaded. :type urls: list :return: The nectar reports. Tuple of: (succeeded_reports, failed_reports) :rtype: tuple """ feed_url = self.feed_url() nectar_config = importer_config_to_nectar_config(self.config.flatten()) nectar_class = URL_TO_DOWNLOADER[urlparse(feed_url).scheme] downloader = nectar_class(nectar_config) listener = DownloadListener(self, downloader) request_list = [] for url, destination in urls: request_list.append(DownloadRequest(url, destination)) downloader.download(request_list) nectar_config.finalize() for report in listener.succeeded_reports: _logger.info(FETCH_SUCCEEDED, dict(url=report.url, dst=report.destination)) for report in listener.failed_reports: _logger.error(FETCH_FAILED, dict(url=report.url, msg=report.error_msg)) return listener.succeeded_reports, listener.failed_reports
def _parse_as_mirrorlist(self, feed): """ Treats the provided feed as mirrorlist. Parses its content and extracts urls to sync. :param feed: feed that should be treated as mirrorlist :type: str :return: list the URLs received from the mirrorlist :rtype: list """ url_file = StringIO() downloader = nectar_factory.create_downloader(feed, self.nectar_config) request = DownloadRequest(feed, url_file) downloader.download_one(request) url_file.seek(0) url_parse = url_file.read().split('\n') repo_url = [] # Due to the fact, that format of mirrorlist can be different, this regex # matches the cases when the url is not commented out and does not have any # punctuation characters in front. pattern = re.compile("(^|^[\w\s=]+\s)((http(s)?)://.*)") for line in url_parse: for match in re.finditer(pattern, line): repo_url.append(match.group(2)) random.shuffle(repo_url) return repo_url
def get_distribution_file(feed, tmp_dir, nectar_config): """ Download the pulp_distribution.xml and return its full path on disk, or None if not found :param feed: URL to the repository :type feed: str :param tmp_dir: full path to the temporary directory being used :type tmp_dir: str :param nectar_config: download config to be used by nectar :type nectar_config: nectar.config.DownloaderConfig :return: full path to distribution file on disk, or None if not found :rtype: str or NoneType """ filename = constants.DISTRIBUTION_XML path = os.path.join(tmp_dir, filename) url = os.path.join(feed, filename) request = DownloadRequest(url, path) listener = AggregatingEventListener() downloader = nectar_factory.create_downloader(feed, nectar_config, listener) downloader.download([request]) if len(listener.succeeded_reports) == 1: return path return None
def get_treefile(feed, tmp_dir, nectar_config): """ Download the treefile and return its full path on disk, or None if not found :param feed: URL to the repository :type feed: str :param tmp_dir: full path to the temporary directory being used :type tmp_dir: str :param nectar_config: download config to be used by nectar :type nectar_config: nectar.config.DownloaderConfig :return: full path to treefile on disk, or None if not found :rtype: str or NoneType """ for filename in constants.TREE_INFO_LIST: path = os.path.join(tmp_dir, filename) url = os.path.join(feed, filename) request = DownloadRequest(url, path) listener = AggregatingEventListener() downloader = nectar_factory.create_downloader(feed, nectar_config, listener) downloader.download([request]) if len(listener.succeeded_reports) == 1: # bz 1095829 strip_treeinfo_repomd(path) return path
def file_to_download_request(file_dict, feed, storage_path): """ Takes information about a file described in a treeinfo file and turns that into a download request suitable for use with nectar. :param file_dict: dict containing keys 'relativepath', 'checksum', and 'checksumtype'. :type file_dict: dict :param feed: URL to the base of a repository :type feed: basestring :param storage_path: full filesystem path to where the downloaded files should be saved. :type storage_path: basestring :return: new download request :rtype: nectar.request.DownloadRequest """ savepath = os.path.join(storage_path, file_dict['relativepath']) # make directories such as "images" if not os.path.exists(os.path.dirname(savepath)): os.makedirs(os.path.dirname(savepath)) return DownloadRequest( os.path.join(feed, file_dict['relativepath']), savepath, file_dict, )
def create_download_request(self, image_id, file_name, destination_dir): """ Return a DownloadRequest instance for the given file name and image ID. It is desirable to download the actual layer files with a separate downloader (for progress tracking, etc), so we just create the download requests here and let them get processed elsewhere. This adds the Authorization header if a token is known for this repository. :param image_id: unique ID of a docker image :type image_id: basestring :param file_name: name of the file, one of "ancestry", "json", or "layer" :type file_name: basestring :param destination_dir: full path to the directory where file should be saved :type destination_dir: basestring :return: a download request instance :rtype: nectar.request.DownloadRequest """ url = self.get_image_url() req = DownloadRequest( urlparse.urljoin(url, '/v1/images/%s/%s' % (image_id, file_name)), os.path.join(destination_dir, file_name)) self.add_auth_header(req) return req
def download_metadata_files(self): """ Download the remaining metadata files. """ if not self.metadata: raise RuntimeError('%s has not been parsed' % REPOMD_FILE_NAME) download_request_list = [] for file_name, file_info in self.metadata.iteritems(): # we don't care about the sqlite files if file_name.endswith('_db') and file_name in self.KNOWN_TYPES: continue url = self._url_modify(self.repo_url, path_append=file_info['relative_path']) dst = os.path.join(self.dst_dir, file_info['relative_path'].rsplit('/', 1)[-1]) file_info['local_path'] = dst request = DownloadRequest(url, dst) download_request_list.append(request) self.downloader.download(download_request_list) if self.event_listener.failed_reports: error_report = self.event_listener.failed_reports[0] raise IOError(error_report.error_msg)
def process_main(self, item=None): wdir = os.path.join(self.get_working_dir()) csums_to_download = dict( (u.checksum, u) for u in self.parent.step_local_units.units_to_download) repometa = self.parent.apt_repo_meta reqs = [] # upstream_url points to the dist itself, dists/stable upstream_url = repometa.upstream_url.rstrip('/') upstream_url = os.path.dirname(os.path.dirname(upstream_url)) step_download_units = self.parent.step_download_units step_download_units.path_to_unit = dict() for ca in repometa.iter_component_arch_binaries(): dest_dir = os.path.join(wdir, "packages", ca.component) misc.mkdir(dest_dir) for pkg in ca.iter_packages(): unit = csums_to_download.get(pkg['SHA256']) if not unit: continue url = os.path.join(upstream_url, pkg['Filename']) dest = os.path.join(dest_dir, os.path.basename(url)) reqs.append(DownloadRequest(url, dest)) step_download_units.path_to_unit[dest] = unit step_download_units._downloads = reqs
def test_fetch_with_timeout(self): """ Test that the report state is failed and that the baseurl can be tried again. """ # requests.ConnectionError def timeout(*args, **kwargs): raise Timeout() with mock.patch('nectar.downloaders.threaded._logger') as mock_logger: URL = 'http://pulpproject.org/primary.xml' req = DownloadRequest(URL, StringIO()) session = threaded.build_session(self.config) session.get = timeout report = self.downloader._fetch(req, session) self.assertEqual(report.state, report.DOWNLOAD_FAILED) self.assertNotIn('pulpproject.org', self.downloader.failed_netlocs) session2 = threaded.build_session(self.config) session2.get = mock.MagicMock() report2 = self.downloader._fetch(req, session2) self.assertEqual(report2.state, report2.DOWNLOAD_FAILED) self.assertEqual(session2.get.call_count, 1) expected_log_message = "Request Timeout - Connection with " \ "http://pulpproject.org/primary.xml timed out." log_calls = [ mock_call[1][0] for mock_call in mock_logger.mock_calls ] self.assertIn(expected_log_message, log_calls)
def get_ancestry(self, image_ids): """ Retrieve the "ancestry" file for each provided image ID, and save each in a directory whose name is the image ID. :param image_ids: list of image IDs for which the ancestry file should be retrieved :type image_ids: list :raises IOError: if a download fails """ requests = [] for image_id in image_ids: path = self.ANCESTRY_PATH % image_id url = urlparse.urljoin(self.get_image_url(), path) destination = os.path.join(self.working_dir, image_id, 'ancestry') try: os.mkdir(os.path.split(destination)[0]) except OSError, e: # it's ok if the directory already exists if e.errno != errno.EEXIST: raise request = DownloadRequest(url, destination) self.add_auth_header(request) requests.append(request)
def test_fetch_with_connection_error_badstatusline(self): """ Test that the baseurl is tried again if ConnectionError reason BadStatusLine happened. """ # requests.ConnectionError def connection_error(*args, **kwargs): raise ConnectionError('Connection aborted.', httplib.BadStatusLine("''", )) with mock.patch('nectar.downloaders.threaded._logger') as mock_logger: URL = 'http://fakeurl/primary.xml' req = DownloadRequest(URL, StringIO()) self.session.get.side_effect = connection_error self.downloader._fetch(req) self.assertEqual(self.session.get.call_count, 2) expected_log_msg = [ 'Attempting to connect to http://fakeurl/primary.xml.', 'Download of http://fakeurl/primary.xml failed. Re-trying.', 'Re-trying http://fakeurl/primary.xml due to remote server ' 'connection failure.', 'Download of http://fakeurl/primary.xml failed. Re-trying.', 'Download of http://fakeurl/primary.xml failed and reached ' 'maximum retries' ] log_calls = [ mock_call[1][0] for mock_call in mock_logger.mock_calls ] self.assertEqual(expected_log_msg, log_calls)
def test_fetch_with_timeout(self): """ Test that the report state is failed and that the baseurl can be tried again. """ with mock.patch('nectar.downloaders.threaded._logger') as mock_logger: URL = 'http://fakeurl/primary.xml' req = DownloadRequest(URL, StringIO()) self.session.get.side_effect = Timeout report = self.downloader._fetch(req) self.assertEqual(report.state, report.DOWNLOAD_FAILED) self.assertNotIn('fakeurl', self.downloader.failed_netlocs) session2 = threaded.build_session(self.config) session2.get = mock.MagicMock() report2 = self.downloader._fetch(req) self.assertEqual(report2.state, report2.DOWNLOAD_FAILED) self.assertEqual(self.session.get.call_count, 2) expected_log_message = "Request Timeout - Connection with " \ "http://fakeurl/primary.xml timed out." log_calls = [ mock_call[1][0] for mock_call in mock_logger.mock_calls ] self.assertIn(expected_log_message, log_calls)
def retrieve_metadata(self, progress_report): """ Retrieves all metadata documents needed to fulfill the configuration set for the repository. The progress report will be updated as the downloads take place. :param progress_report: used to communicate the progress of this operation :type progress_report: pulp_puppet.importer.sync_progress.ProgressReport :return: list of JSON documents describing all modules to import :rtype: list """ urls = self._create_metadata_download_urls() # Update the progress report to reflect the number of queries it will take progress_report.metadata_query_finished_count = 0 progress_report.metadata_query_total_count = len(urls) listener = HTTPMetadataDownloadEventListener(progress_report) self.downloader = self._create_and_configure_downloader(listener) request_list = [DownloadRequest(url, StringIO()) for url in urls] # Let any exceptions from this bubble up, the caller will update # the progress report as necessary try: self.downloader.download(request_list) finally: self.downloader.config.finalize() self.downloader = None for report in listener.failed_reports: raise exceptions.FileRetrievalException(report.error_msg) return [r.destination.getvalue() for r in request_list]
def test_fetch_with_connection_error(self): """ Test that the report state is failed and that the baseurl is not tried again. """ # requests.ConnectionError def connection_error(*args, **kwargs): raise ConnectionError() with mock.patch('nectar.downloaders.threaded._logger') as mock_logger: URL = 'http://fakeurl/primary.xml' req = DownloadRequest(URL, StringIO()) self.session.get = connection_error try: report = self.downloader._fetch(req) except ConnectionError: raise AssertionError("ConnectionError should be raised") self.assertEqual(report.state, report.DOWNLOAD_FAILED) self.assertIn('fakeurl', self.downloader.failed_netlocs) report2 = self.downloader._fetch(req) self.assertEqual(report2.state, report2.DOWNLOAD_FAILED) expected_log_message = "Connection Error - http://fakeurl/primary.xml " \ "could not be reached." log_calls = [ mock_call[1][0] for mock_call in mock_logger.mock_calls ] self.assertIn(expected_log_message, log_calls)
def process_main(self, item=None): releases = self.parent.releases components = self.parent.components architectures = self.parent.architectures dl_reqs = [] for release in releases: self.verify_release(release) # generate repo_metas for Releases self.parent.apt_repo_meta[ release] = repometa = aptrepo.AptRepoMeta( release=open(self.parent.release_files[release], "rb"), upstream_url=self.parent.feed_urls[release]) # get release unit codename = repometa.codename suite = repometa.release.get('suite') rel_unit = self.parent.release_units[release] = models.DebRelease.\ get_or_create_and_associate(self.parent.repo, codename, suite) # Prevent this unit from being cleaned up try: self.parent.deb_releases_to_check.remove(rel_unit) except ValueError: pass # get release component units for component in repometa.components: if components is None or component.split( '/')[-1] in components: comp_unit = self.parent.component_units[release][component] = \ models.DebComponent.get_or_create_and_associate(self.parent.repo, rel_unit, component) self.parent.component_packages[release][component] = [] # Prevent this unit from being cleaned up try: self.parent.deb_comps_to_check.remove(comp_unit) except ValueError: pass # generate download requests for all relevant packages files rel_dl_reqs = repometa.create_Packages_download_requests( self.get_working_dir()) # Filter the rel_dl_reqs by selected components and architectures if components: rel_dl_reqs = [ dlr for dlr in rel_dl_reqs if dlr.data['component'].split('/')[-1] in components ] if architectures: rel_dl_reqs = [ dlr for dlr in rel_dl_reqs if dlr.data['architecture'] in architectures ] self.parent.packages_urls[release] = set( [dlr.url for dlr in rel_dl_reqs]) dl_reqs.extend(rel_dl_reqs) self.parent.step_download_Packages._downloads = [ DownloadRequest(dlr.url, dlr.destination, data=dlr.data) for dlr in dl_reqs ]
def test_downloads_property(self): generator = (DownloadRequest(url, '/a/b/c') for url in ['http://pulpproject.org']) dlstep = publish_step.DownloadStep('fake-step', downloads=generator) downloads = dlstep.downloads self.assertTrue(isinstance(downloads, list)) self.assertEqual(len(downloads), 1) self.assertTrue(isinstance(downloads[0], DownloadRequest))
def _make_requests(self, data_file_names=DATA_FILES): requests = [] for d in data_file_names: src_url = 'file:/' + os.path.join(DATA_DIR, d) dest_path = os.path.join(self.dest_dir, d) requests.append(DownloadRequest(src_url, dest_path)) return requests
def test_common_link_canceled(self, mock_canceled): downloader = local.LocalFileDownloader(DownloaderConfig()) downloader.cancel() request = DownloadRequest('file://' + __file__, '/bar') downloader._common_link(mock.MagicMock(), request) # make sure the cancel method was called on the report mock_canceled.assert_called_once_with()
def __init__(self, repo, conduit, config): """ :param repo: repository to sync :type repo: pulp.plugins.model.Repository :param conduit: sync conduit to use :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: config object for the sync :type config: pulp.plugins.config.PluginCallConfiguration """ super(RepoSync, self).__init__(step_type=constants.SYNC_STEP, repo=repo, conduit=conduit, config=config) self.description = _('Syncing Repository') self.apt_repo_meta = None # https://pulp.plan.io/issues/2765 should remove the need to hardcode # the dist/component here self.feed_url = self.get_config().get('feed').strip( '/') + '/dists/stable/' self.release_file = os.path.join(self.get_working_dir(), "Release") self.available_units = None rel_url = urlparse.urljoin(self.feed_url, 'Release') _logger.info("Downloading %s", rel_url) self.add_child( publish_step.DownloadStep( constants.SYNC_STEP_RELEASE_DOWNLOAD, plugin_type=ids.TYPE_ID_IMPORTER, description=_('Retrieving metadata: release file'), downloads=[DownloadRequest(rel_url, self.release_file)])) self.add_child(ParseReleaseStep(constants.SYNC_STEP_RELEASE_PARSE)) self.step_download_Packages = publish_step.DownloadStep( constants.SYNC_STEP_PACKAGES_DOWNLOAD, plugin_type=ids.TYPE_ID_IMPORTER, description=_('Retrieving metadata: Packages files')) self.add_child(self.step_download_Packages) self.add_child(ParsePackagesStep(constants.SYNC_STEP_PACKAGES_PARSE)) self.step_local_units = publish_step.GetLocalUnitsStep( importer_type=ids.TYPE_ID_IMPORTER) self.add_child(self.step_local_units) self.add_child( CreateRequestsUnitsToDownload( constants.SYNC_STEP_UNITS_DOWNLOAD_REQUESTS)) self.step_download_units = publish_step.DownloadStep( constants.SYNC_STEP_UNITS_DOWNLOAD, plugin_type=ids.TYPE_ID_IMPORTER, description=_('Retrieving units')) self.add_child(self.step_download_units) self.add_child(SaveDownloadedUnits(constants.SYNC_STEP_SAVE))
def download_repomd(self): """ Download the main repomd.xml file. """ repomd_dst_path = os.path.join(self.dst_dir, REPOMD_FILE_NAME) repomd_url = urljoin(self.repo_url, REPOMD_URL_RELATIVE_PATH) repomd_request = DownloadRequest(repomd_url, repomd_dst_path) self.downloader.download([repomd_request]) if self.event_listener.failed_reports: error_report = self.event_listener.failed_reports[0] raise IOError(error_report.error_msg)
def _get_single_path(self, path): """ Retrieve a single path within the upstream registry, and return its body after deserializing it as json :param path: a full http path to retrieve that will be urljoin'd to the upstream registry url. :type path: basestring :return: whatever gets deserialized out of the response body's json """ # if talking to docker hub, we'll get an endpoint specified, and then we'll have to get # tags from that endpoint instead of talking to the original feed URL. if self.endpoint: # we assume the same scheme that the registry URL used registry_url_parts = urlparse.urlsplit(self.registry_url) parts = urlparse.SplitResult(scheme=registry_url_parts.scheme, netloc=self.endpoint, path=path, query=None, fragment=None) url = urlparse.urlunsplit(parts) else: url = urlparse.urljoin(self.registry_url, path) request = DownloadRequest(url, StringIO()) if path.endswith('/images'): # this is required by the docker index and indicates that it should # return an auth token if request.headers is None: request.headers = {} request.headers[self.DOCKER_TOKEN_HEADER] = 'true' # endpoints require auth if self.endpoint: self.add_auth_header(request) report = self.downloader.download_one(request) if report.state == report.DOWNLOAD_FAILED: raise IOError(report.error_msg) self._parse_response_headers(report.headers) return json.loads(report.destination.getvalue())
def test_calls_fetch(self, mock_fetch): config = DownloaderConfig() request = DownloadRequest('http://foo', StringIO()) report = DownloadReport.from_download_request(request) downloader = threaded.HTTPThreadedDownloader(config) mock_fetch.return_value = report ret = downloader._download_one(request) self.assertEqual(mock_fetch.call_count, 1) self.assertTrue(ret is report) self.assertTrue(mock_fetch.call_args[0][0] is request)
def test_unsupported_url_scheme(self): config = DownloaderConfig(use_sym_links=True) listener = AggregatingEventListener() downloader = local.LocalFileDownloader(config, listener) request = DownloadRequest('http://thiswontwork.com', os.path.join(self.dest_dir, 'doesnt.even.matter')) downloader.download([request]) self.assertEqual(len(listener.succeeded_reports), 0) self.assertEqual(len(listener.failed_reports), 1)
def test_source_bad_permissions(self): config = DownloaderConfig(use_sym_links=True) listener = AggregatingEventListener() downloader = local.LocalFileDownloader(config, listener) request = DownloadRequest('file://root/no', os.path.join(self.dest_dir, 'doesnt.even.matter')) downloader.download([request]) self.assertEqual(len(listener.succeeded_reports), 0) self.assertEqual(len(listener.failed_reports), 1)
def __iter__(self): """ Performs a get() on the queue until reaching the end-of-queue marker. :return: An iterable of: DownloadRequest. :rtype: iterable """ while True: item = self.queue.get() if item is None: # end-of-queue marker return request = DownloadRequest(item.url, item.request.destination, data=item.request) yield request