def refresh_deb_repo(repo): """ Refresh a debian repo. Checks for the Packages* files to determine what the mirror urls are and then downloads and extracts packages from those files. """ formats = ['Packages.bz2', 'Packages.gz', 'Packages'] for mirror in repo.mirror_set.filter(refresh=True): repo_url, res, unused = find_mirror_url(mirror.url, formats) mirror.last_access_ok = check_response(res) if mirror.last_access_ok: text = 'Found deb repo - %s\n' % repo_url info_message.send(sender=None, text=text) data = download_url(res, 'Downloading repo info:') if data is None: mirror.fail() return sha1 = get_sha1(data) if mirror.file_checksum == sha1: text = 'Mirror checksum has not changed, ' text += 'not refreshing package metadata\n' info_message.send(sender=None, text=text) else: packages = set() extract_deb_packages(data, packages) mirror.last_access_ok = True mirror.timestamp = datetime.now() update_mirror_packages(mirror, packages) mirror.file_checksum = sha1 packages.clear() else: mirror.fail() mirror.save()
def refresh_yum_repo(mirror, data, mirror_url, ts): """ Refresh package metadata for a yum-style rpm mirror and add the packages to the mirror """ primary_url, checksum, checksum_type = get_primary_url(mirror_url, data) if not primary_url: mirror.fail() return res = get_url(primary_url) mirror.last_access_ok = response_is_valid(res) if not mirror.last_access_ok: mirror.fail() return data = download_url(res, 'Downloading repo info (2/2):') if data is None: mirror.fail() return sha = get_sha(checksum_type, data) if sha is None: mirror.fail() return if not checksum_is_valid(sha, checksum, mirror): mirror.fail() return if mirror.file_checksum == checksum: text = 'Mirror checksum has not changed, ' text += 'not refreshing package metadata' warning_message.send(sender=None, text=text) return mirror.file_checksum = checksum if hasattr(settings, 'MAX_MIRRORS') and \ isinstance(settings.MAX_MIRRORS, int): max_mirrors = settings.MAX_MIRRORS # only refresh X mirrors, where X = max_mirrors checksum_q = Q(mirrorlist=False, refresh=True, timestamp=ts, file_checksum=checksum) have_checksum = mirror.repo.mirror_set.filter(checksum_q).count() if have_checksum >= max_mirrors: text = '{0!s} mirrors already have this '.format(max_mirrors) text += 'checksum, ignoring refresh to save time' info_message.send(sender=None, text=text) else: packages = extract_yum_packages(data, primary_url) if packages: update_mirror_packages(mirror, packages)
def refresh_rpm_repo(repo): """ Refresh an rpm repo. Checks if the repo url is a mirrorlist, and extracts mirrors if so. If not, checks a number of common rpm repo formats to determine which type of repo it is, and to determine the mirror urls. """ formats = [ 'repodata/repomd.xml.bz2', 'repodata/repomd.xml.gz', 'repodata/repomd.xml', 'suse/repodata/repomd.xml.bz2', 'suse/repodata/repomd.xml.gz', 'suse/repodata/repomd.xml', 'content', ] if lzma is not None: formats.insert(0, 'repodata/repomd.xml.xz') formats.insert(4, 'suse/repodata/repomd.xml.xz') check_for_mirrorlists(repo) check_for_metalinks(repo) ts = datetime.now().replace(microsecond=0) for mirror in repo.mirror_set.filter(mirrorlist=False, refresh=True): res = find_mirror_url(mirror.url, formats) mirror.last_access_ok = response_is_valid(res) if mirror.last_access_ok: data = download_url(res, 'Downloading repo info (1/2):') if data is None: mirror.fail() return mirror_url = res.url if res.url.endswith('content'): text = 'Found yast rpm repo - {0!s}'.format(mirror_url) info_message.send(sender=None, text=text) refresh_yast_repo(mirror, data) else: text = 'Found yum rpm repo - {0!s}'.format(mirror_url) info_message.send(sender=None, text=text) refresh_yum_repo(mirror, data, mirror_url, ts) mirror.timestamp = ts else: mirror.fail() mirror.save()
def get_mirrorlist_urls(url): """ Checks if a given url returns a mirrorlist by checking if it is of type text/plain and contains a list of urls. Returns a list of mirrors if it is a mirrorlist. """ res = get_url(url) if response_is_valid(res): if 'content-type' in res.headers and \ 'text/plain' in res.headers['content-type']: data = download_url(res, 'Downloading repo info:') if data is None: return mirror_urls = re.findall(b'^http://.*$|^ftp://.*$', data, re.MULTILINE) if mirror_urls: return mirror_urls
def get_metalink_urls(url): """ Parses a metalink and returns a list of mirrors """ res = get_url(url) if response_is_valid(res): if 'content-type' in res.headers and \ res.headers['content-type'] == 'application/metalink+xml': data = download_url(res, 'Downloading repo info:') ns = 'http://www.metalinker.org/' try: context = etree.parse(BytesIO(data), etree.XMLParser()) except etree.XMLSyntaxError: context = etree.parse(BytesIO(extract(data, 'gz')), etree.XMLParser()) xpath = "//ns:files/ns:file[@name='repomd.xml']/ns:resources/ns:url[@protocol='https']" # noqa metalink_urls = context.xpath(xpath, namespaces={'ns': ns}) return [x.text for x in metalink_urls]
def mirrorlist_check(mirror_url): """ Checks if a given url returns a mirrorlist. Does this by checking if it is of type text/plain and contains a list of urls """ res = get_url(mirror_url) if type(res) != int: headers = dict(res.headers.items()) if 'content-type' in headers and \ re.match('text/plain', headers['content-type']) is not None: data = download_url(res, 'Downloading repo info:') if data is None: return mirror_urls = re.findall('^http://.*$|^ftp://.*$', data, re.MULTILINE) if mirror_urls: return mirror_urls return
def refresh_rpm_repo(repo): """ Refresh an rpm repo. Checks if the repo url is a mirrorlist, and extracts mirrors if so. If not, checks a number of common rpm repo formats to determine which type of repo it is, and to determine the mirror urls. """ formats = [ 'repodata/repomd.xml.bz2', 'repodata/repomd.xml.gz', 'repodata/repomd.xml', 'suse/repodata/repomd.xml.bz2', 'suse/repodata/repomd.xml.gz', 'suse/repodata/repomd.xml', 'content' ] mirrorlists_check(repo) ts = datetime.now().replace(microsecond=0) for mirror in repo.mirror_set.filter(mirrorlist=False, refresh=True): repo_url, res, yast = find_mirror_url(mirror.url, formats) mirror.last_access_ok = check_response(res) if mirror.last_access_ok: data = download_url(res, 'Downloading repo info (1/2):') if data is None: mirror.fail() return if not yast: text = 'Found yum rpm repo - %s\n' % repo_url info_message.send(sender=None, text=text) refresh_yum_repo(mirror, data, repo_url, ts) else: text = 'Found yast rpm repo - %s\n' % repo_url info_message.send(sender=None, text=text) refresh_yast_repo(mirror, data, repo_url) mirror.timestamp = ts else: mirror.fail() mirror.save()
def refresh_yast_repo(mirror, data): """ Refresh package metadata for a yast-style rpm mirror and add the packages to the mirror """ package_dir = re.findall(b'DESCRDIR *(.*)', data)[0].decode('ascii') package_url = '{0!s}/{1!s}/packages.gz'.format(mirror.url, package_dir) res = get_url(package_url) mirror.last_access_ok = response_is_valid(res) if mirror.last_access_ok: data = download_url(res, 'Downloading repo info (2/2):') if data is None: mirror.fail() return mirror.file_checksum = 'yast' packages = extract_yast_packages(data) if packages: update_mirror_packages(mirror, packages) else: mirror.fail()
def refresh_yast_repo(mirror, data, repo_url): """ Refresh package metadata for a yast-style rpm mirror and add the packages to the mirror """ package_dir = re.findall('DESCRDIR *(.*)', data)[0] package_url = '%s/%s/packages.gz' % (mirror.url, package_dir) res = get_url(package_url) mirror.last_access_ok = check_response(res) if mirror.last_access_ok: data = download_url(res, 'Downloading repo info (2/2):') if data is None: mirror.fail() return mirror.file_checksum = 'yast' packages = extract_yast_packages(data) if packages: update_mirror_packages(mirror, packages) else: mirror.fail()
def refresh_yum_repo(mirror, data, repo_url, ts): """ Refresh package metadata for a yum-style rpm mirror and add the packages to the mirror """ primary_url, checksum, checksum_type = get_primary_url(repo_url, data) if not primary_url: mirror.fail() return res = get_url(primary_url) mirror.last_access_ok = response_is_valid(res) if mirror.last_access_ok: data = download_url(res, 'Downloading repo info (2/2):') if data is None: mirror.fail() return valid = checksum_is_valid(mirror, checksum, checksum_type, data) if valid: mirror.file_checksum = checksum else: mirror.fail() return if hasattr(settings, 'MAX_MIRRORS') and \ isinstance(settings.MAX_MIRRORS, int): max_mirrors = settings.MAX_MIRRORS # only refresh X mirrors, where X = max_mirrors checksum_q = Q(mirrorlist=False, refresh=True, timestamp=ts, file_checksum=checksum) have_checksum = mirror.repo.mirror_set.filter(checksum_q).count() if have_checksum >= max_mirrors: text = '%s mirrors already have this checksum, ' % max_mirrors text += 'ignoring refresh to save time\n' info_message.send(sender=None, text=text) else: packages = extract_yum_packages(data) if packages: update_mirror_packages(mirror, packages) else: mirror.fail()
def refresh_deb_repo(repo): """ Refresh a debian repo. Checks for the Packages* files to determine what the mirror urls are and then downloads and extracts packages from those files. """ formats = ['Packages.bz2', 'Packages.gz', 'Packages'] if lzma is not None: formats.insert(0, 'Packages.xz') for mirror in repo.mirror_set.filter(refresh=True): res = find_mirror_url(mirror.url, formats) mirror.last_access_ok = response_is_valid(res) if mirror.last_access_ok: mirror_url = res.url text = 'Found deb repo - {0!s}'.format(mirror_url) info_message.send(sender=None, text=text) data = download_url(res, 'Downloading repo info:') if data is None: mirror.fail() return sha1 = get_sha1(data) if mirror.file_checksum == sha1: text = 'Mirror checksum has not changed, ' text += 'not refreshing package metadata' warning_message.send(sender=None, text=text) else: packages = extract_deb_packages(data, mirror_url) mirror.last_access_ok = True mirror.timestamp = datetime.now() update_mirror_packages(mirror, packages) mirror.file_checksum = sha1 packages.clear() else: mirror.fail() mirror.save()
def download_errata(): """ Download CentOS errata from https://cefs.steve-meier.de/ """ res = get_url('https://cefs.steve-meier.de/errata.latest.xml') return download_url(res, 'Downloading CentOS Errata:')