def main(): if len(sys.argv) == 1: sys.stderr.write( "usage: mlpatch.py dev|users year month msgno > foobar.patch\n" + "example: mlpatch.py dev 2005 01 0001 > issue-XXXX.patch\n" + """ Very annoyingly, the http://svn.haxx.se/ subversion mailing list archives mangle inline patches, and provide no raw message download facility (other than for an entire month's email as an mbox). So, I wrote this script, to demangle them. It's not perfect, as it has to guess about whitespace, but it does an acceptable job.\n""") sys.exit(0) elif len(sys.argv) != 5: sys.stderr.write("error: mlpatch.py: Bad parameters - run with no " + "parameters for usage\n") sys.exit(1) else: list, year, month, msgno = sys.argv[1:] url = "http://svn.haxx.se/" \ + "%(list)s/archive-%(year)s-%(month)s/%(msgno)s.shtml" % locals() print("MsgUrl: " + url) msgfile = urllib_request_urlopen(url) p = MyParser() buffer = msgfile.read(CHUNKSIZE) while buffer: p.feed(buffer) buffer = msgfile.read(CHUNKSIZE) p.close() msgfile.close()
def main(search_keyword): url = get_url(search_keyword) req = urllib_request_Request( url, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36' }) page_content = urllib_request_urlopen(req).read().decode('utf8') return get_youku_result(search_keyword, page_content)
def main(search_keyword): url = get_url(search_keyword) req = urllib_request_Request( url, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36' }) page_content = urllib_request_urlopen(req).read().decode('utf8') baidu_search_result = dict() #baidu_search_result.update(get_baidu_result_2(search_keyword, page_content)) baidu_search_result.update(get_baidu_result_1(search_keyword, page_content)) return {e[1]: e[0] for e in baidu_search_result.items()}
def read(self): """ Here we build the filename from the config and the ID and pass it to urllib to fetch it from the filesystem or a remote server. @rtype: None @return: None """ if "GLSA_DIR" in self.config: repository = "file://" + self.config["GLSA_DIR"] + "/" else: repository = "file://" + self.config["PORTDIR"] + "/metadata/glsa/" if self.type == "file": myurl = "file://" + self.nr else: myurl = repository + "glsa-%s.xml" % str(self.nr) self.parse(urllib_request_urlopen(myurl)) return None
def read(self): """ Here we build the filename from the config and the ID and pass it to urllib to fetch it from the filesystem or a remote server. @rtype: None @return: None """ if "GLSA_DIR" in self.config: repository = "file://" + self.config["GLSA_DIR"]+"/" else: repository = "file://" + self.config["PORTDIR"] + "/metadata/glsa/" if self.type == "file": myurl = "file://"+self.nr else: myurl = repository + "glsa-%s.xml" % str(self.nr) self.parse(urllib_request_urlopen(myurl)) return None
def read(self): """ Here we build the filename from the config and the ID and pass it to urllib to fetch it from the filesystem or a remote server. @rtype: None @return: None """ if "GLSA_DIR" in self.config: repository = f"file://{self.config['GLSA_DIR']}/" else: repository = f"file://{self.config['PORTDIR']}/metadata/glsa/" if self.type == "file": myurl = f"file://{self.nr}" else: myurl = f"{repository}glsa-{self.nr}.xml" f = urllib_request_urlopen(myurl) try: self.parse(f) finally: f.close()
def _populate(self, getbinpkgs=0): if (not os.path.isdir(self.pkgdir) and not getbinpkgs): return 0 # Clear all caches in case populate is called multiple times # as may be the case when _global_updates calls populate() # prior to performing package moves since it only wants to # operate on local packages (getbinpkgs=0). self._remotepkgs = None self.dbapi._clear_cache() self.dbapi._aux_cache.clear() if True: pkg_paths = {} self._pkg_paths = pkg_paths dirs = listdir(self.pkgdir, dirsonly=True, EmptyOnError=True) if "All" in dirs: dirs.remove("All") dirs.sort() dirs.insert(0, "All") pkgindex = self._load_pkgindex() pf_index = None if not self._pkgindex_version_supported(pkgindex): pkgindex = self._new_pkgindex() header = pkgindex.header metadata = {} for d in pkgindex.packages: metadata[d["CPV"]] = d update_pkgindex = False for mydir in dirs: for myfile in listdir(os.path.join(self.pkgdir, mydir)): if not myfile.endswith(".tbz2"): continue mypath = os.path.join(mydir, myfile) full_path = os.path.join(self.pkgdir, mypath) s = os.lstat(full_path) if stat.S_ISLNK(s.st_mode): continue # Validate data from the package index and try to avoid # reading the xpak if possible. if mydir != "All": possibilities = None d = metadata.get(mydir+"/"+myfile[:-5]) if d: possibilities = [d] else: if pf_index is None: pf_index = {} for mycpv in metadata: mycat, mypf = catsplit(mycpv) pf_index.setdefault( mypf, []).append(metadata[mycpv]) possibilities = pf_index.get(myfile[:-5]) if possibilities: match = None for d in possibilities: try: if long(d["MTIME"]) != s[stat.ST_MTIME]: continue except (KeyError, ValueError): continue try: if long(d["SIZE"]) != long(s.st_size): continue except (KeyError, ValueError): continue if not self._pkgindex_keys.difference(d): match = d break if match: mycpv = match["CPV"] if mycpv in pkg_paths: # discard duplicates (All/ is preferred) continue pkg_paths[mycpv] = mypath # update the path if the package has been moved oldpath = d.get("PATH") if oldpath and oldpath != mypath: update_pkgindex = True if mypath != mycpv + ".tbz2": d["PATH"] = mypath if not oldpath: update_pkgindex = True else: d.pop("PATH", None) if oldpath: update_pkgindex = True self.dbapi.cpv_inject(mycpv) if not self.dbapi._aux_cache_keys.difference(d): aux_cache = self.dbapi._aux_cache_slot_dict() for k in self.dbapi._aux_cache_keys: aux_cache[k] = d[k] self.dbapi._aux_cache[mycpv] = aux_cache continue if not os.access(full_path, os.R_OK): writemsg(_("!!! Permission denied to read " \ "binary package: '%s'\n") % full_path, noiselevel=-1) self.invalids.append(myfile[:-5]) continue metadata_bytes = portage.xpak.tbz2(full_path).get_data() mycat = _unicode_decode(metadata_bytes.get( _unicode_encode("CATEGORY", encoding=_encodings['repo.content']), ""), encoding=_encodings['repo.content'], errors='replace') mypf = _unicode_decode(metadata_bytes.get( _unicode_encode("PF", encoding=_encodings['repo.content']), ""), encoding=_encodings['repo.content'], errors='replace') slot = _unicode_decode(metadata_bytes.get( _unicode_encode("SLOT", encoding=_encodings['repo.content']), ""), encoding=_encodings['repo.content'], errors='replace') mypkg = myfile[:-5] if not mycat or not mypf or not slot: #old-style or corrupt package writemsg(_("\n!!! Invalid binary package: '%s'\n") % full_path, noiselevel=-1) missing_keys = [] if not mycat: missing_keys.append("CATEGORY") if not mypf: missing_keys.append("PF") if not slot: missing_keys.append("SLOT") msg = [] if missing_keys: missing_keys.sort() msg.append(_("Missing metadata key(s): %s.") % \ ", ".join(missing_keys)) msg.append(_(" This binary package is not " \ "recoverable and should be deleted.")) from textwrap import wrap for line in wrap("".join(msg), 72): writemsg("!!! %s\n" % line, noiselevel=-1) self.invalids.append(mypkg) continue mycat = mycat.strip() slot = slot.strip() if mycat != mydir and mydir != "All": continue if mypkg != mypf.strip(): continue mycpv = mycat + "/" + mypkg if mycpv in pkg_paths: # All is first, so it's preferred. continue if not self.dbapi._category_re.match(mycat): writemsg(_("!!! Binary package has an " \ "unrecognized category: '%s'\n") % full_path, noiselevel=-1) writemsg(_("!!! '%s' has a category that is not" \ " listed in %setc/portage/categories\n") % \ (mycpv, self.settings["PORTAGE_CONFIGROOT"]), noiselevel=-1) continue pkg_paths[mycpv] = mypath self.dbapi.cpv_inject(mycpv) update_pkgindex = True d = metadata.get(mycpv, {}) if d: try: if long(d["MTIME"]) != s[stat.ST_MTIME]: d.clear() except (KeyError, ValueError): d.clear() if d: try: if long(d["SIZE"]) != long(s.st_size): d.clear() except (KeyError, ValueError): d.clear() d["CPV"] = mycpv d["SLOT"] = slot d["MTIME"] = str(s[stat.ST_MTIME]) d["SIZE"] = str(s.st_size) d.update(zip(self._pkgindex_aux_keys, self.dbapi.aux_get(mycpv, self._pkgindex_aux_keys))) try: self._eval_use_flags(mycpv, d) except portage.exception.InvalidDependString: writemsg(_("!!! Invalid binary package: '%s'\n") % \ self.getname(mycpv), noiselevel=-1) self.dbapi.cpv_remove(mycpv) del pkg_paths[mycpv] # record location if it's non-default if mypath != mycpv + ".tbz2": d["PATH"] = mypath else: d.pop("PATH", None) metadata[mycpv] = d if not self.dbapi._aux_cache_keys.difference(d): aux_cache = self.dbapi._aux_cache_slot_dict() for k in self.dbapi._aux_cache_keys: aux_cache[k] = d[k] self.dbapi._aux_cache[mycpv] = aux_cache for cpv in list(metadata): if cpv not in pkg_paths: del metadata[cpv] # Do not bother to write the Packages index if $PKGDIR/All/ exists # since it will provide no benefit due to the need to read CATEGORY # from xpak. if update_pkgindex and os.access(self.pkgdir, os.W_OK): del pkgindex.packages[:] pkgindex.packages.extend(iter(metadata.values())) self._update_pkgindex_header(pkgindex.header) from portage.util import atomic_ofstream f = atomic_ofstream(self._pkgindex_file) try: pkgindex.write(f) finally: f.close() if getbinpkgs and not self.settings["PORTAGE_BINHOST"]: writemsg(_("!!! PORTAGE_BINHOST unset, but use is requested.\n"), noiselevel=-1) if getbinpkgs and 'PORTAGE_BINHOST' in self.settings: base_url = self.settings["PORTAGE_BINHOST"] from portage.const import CACHE_PATH try: from urllib.parse import urlparse except ImportError: from urlparse import urlparse urldata = urlparse(base_url) pkgindex_file = os.path.join(self.settings["ROOT"], CACHE_PATH, "binhost", urldata[1] + urldata[2], "Packages") pkgindex = self._new_pkgindex() try: f = codecs.open(_unicode_encode(pkgindex_file, encoding=_encodings['fs'], errors='strict'), mode='r', encoding=_encodings['repo.content'], errors='replace') try: pkgindex.read(f) finally: f.close() except EnvironmentError as e: if e.errno != errno.ENOENT: raise local_timestamp = pkgindex.header.get("TIMESTAMP", None) try: from urllib.request import urlopen as urllib_request_urlopen except ImportError: from urllib import urlopen as urllib_request_urlopen rmt_idx = self._new_pkgindex() try: # urlparse.urljoin() only works correctly with recognized # protocols and requires the base url to have a trailing # slash, so join manually... f = urllib_request_urlopen(base_url.rstrip("/") + "/Packages") f_dec = codecs.iterdecode(f, _encodings['repo.content'], errors='replace') try: rmt_idx.readHeader(f_dec) remote_timestamp = rmt_idx.header.get("TIMESTAMP", None) if not remote_timestamp: # no timestamp in the header, something's wrong pkgindex = None else: if not self._pkgindex_version_supported(rmt_idx): writemsg(_("\n\n!!! Binhost package index version" \ " is not supported: '%s'\n") % \ rmt_idx.header.get("VERSION"), noiselevel=-1) pkgindex = None elif local_timestamp != remote_timestamp: rmt_idx.readBody(f_dec) pkgindex = rmt_idx finally: f.close() except EnvironmentError as e: writemsg(_("\n\n!!! Error fetching binhost package" \ " info from '%s'\n") % base_url) writemsg("!!! %s\n\n" % str(e)) del e pkgindex = None if pkgindex is rmt_idx: pkgindex.modified = False # don't update the header from portage.util import atomic_ofstream, ensure_dirs try: ensure_dirs(os.path.dirname(pkgindex_file)) f = atomic_ofstream(pkgindex_file) pkgindex.write(f) f.close() except PortageException: if os.access(os.path.join( self.settings["ROOT"], CACHE_PATH), os.W_OK): raise # The current user doesn't have permission to cache the # file, but that's alright. if pkgindex: self._remotepkgs = {} for d in pkgindex.packages: self._remotepkgs[d["CPV"]] = d self._remote_has_index = True self._remote_base_uri = pkgindex.header.get("URI", base_url) self.__remotepkgs = {} for cpv in self._remotepkgs: self.dbapi.cpv_inject(cpv) self.populated = 1 if True: # Remote package instances override local package # if they are not identical. hash_names = ["SIZE"] + self._pkgindex_hashes for cpv, local_metadata in metadata.items(): remote_metadata = self._remotepkgs.get(cpv) if remote_metadata is None: continue # Use digests to compare identity. identical = True for hash_name in hash_names: local_value = local_metadata.get(hash_name) if local_value is None: continue remote_value = remote_metadata.get(hash_name) if remote_value is None: continue if local_value != remote_value: identical = False break if identical: del self._remotepkgs[cpv] else: # Override the local package in the aux_get cache. self.dbapi._aux_cache[cpv] = remote_metadata else: # Local package instances override remote instances. for cpv in metadata: self._remotepkgs.pop(cpv, None) return self._remotepkgs = {} try: chunk_size = long(self.settings["PORTAGE_BINHOST_CHUNKSIZE"]) if chunk_size < 8: chunk_size = 8 except (ValueError, KeyError): chunk_size = 3000 writemsg_stdout("\n") writemsg_stdout( colorize("GOOD", _("Fetching bininfo from ")) + \ re.sub(r'//(.+):.+@(.+)/', r'//\1:*password*@\2/', base_url) + "\n") self.__remotepkgs = portage.getbinpkg.dir_get_metadata( self.settings["PORTAGE_BINHOST"], chunk_size=chunk_size) #writemsg(green(" -- DONE!\n\n")) for mypkg in list(self.__remotepkgs): if "CATEGORY" not in self.__remotepkgs[mypkg]: #old-style or corrupt package writemsg(_("!!! Invalid remote binary package: %s\n") % mypkg, noiselevel=-1) del self.__remotepkgs[mypkg] continue mycat = self.__remotepkgs[mypkg]["CATEGORY"].strip() fullpkg = mycat+"/"+mypkg[:-5] if fullpkg in metadata: # When using this old protocol, comparison with the remote # package isn't supported, so the local package is always # preferred even if getbinpkgsonly is enabled. continue if not self.dbapi._category_re.match(mycat): writemsg(_("!!! Remote binary package has an " \ "unrecognized category: '%s'\n") % fullpkg, noiselevel=-1) writemsg(_("!!! '%s' has a category that is not" \ " listed in %setc/portage/categories\n") % \ (fullpkg, self.settings["PORTAGE_CONFIGROOT"]), noiselevel=-1) continue mykey = portage.cpv_getkey(fullpkg) try: # invalid tbz2's can hurt things. self.dbapi.cpv_inject(fullpkg) remote_metadata = self.__remotepkgs[mypkg] for k, v in remote_metadata.items(): remote_metadata[k] = v.strip() # Eliminate metadata values with names that digestCheck # uses, since they are not valid when using the old # protocol. Typically this is needed for SIZE metadata # which corresponds to the size of the unpacked files # rather than the binpkg file size, triggering digest # verification failures as reported in bug #303211. remote_metadata.pop('SIZE', None) for k in portage.checksum.hashfunc_map: remote_metadata.pop(k, None) self._remotepkgs[fullpkg] = remote_metadata except SystemExit as e: raise except: writemsg(_("!!! Failed to inject remote binary package: %s\n") % fullpkg, noiselevel=-1) del self.__remotepkgs[mypkg] continue self.populated=1
def _populate(self, getbinpkgs=0): if (not os.path.isdir(self.pkgdir) and not getbinpkgs): return 0 # Clear all caches in case populate is called multiple times # as may be the case when _global_updates calls populate() # prior to performing package moves since it only wants to # operate on local packages (getbinpkgs=0). self._remotepkgs = None self.dbapi._clear_cache() self.dbapi._aux_cache.clear() if True: pkg_paths = {} self._pkg_paths = pkg_paths dirs = listdir(self.pkgdir, dirsonly=True, EmptyOnError=True) if "All" in dirs: dirs.remove("All") dirs.sort() dirs.insert(0, "All") pkgindex = self._load_pkgindex() pf_index = None if not self._pkgindex_version_supported(pkgindex): pkgindex = self._new_pkgindex() header = pkgindex.header metadata = {} for d in pkgindex.packages: metadata[d["CPV"]] = d update_pkgindex = False for mydir in dirs: for myfile in listdir(os.path.join(self.pkgdir, mydir)): if not myfile.endswith(".tbz2"): continue mypath = os.path.join(mydir, myfile) full_path = os.path.join(self.pkgdir, mypath) s = os.lstat(full_path) if stat.S_ISLNK(s.st_mode): continue # Validate data from the package index and try to avoid # reading the xpak if possible. if mydir != "All": possibilities = None d = metadata.get(mydir+"/"+myfile[:-5]) if d: possibilities = [d] else: if pf_index is None: pf_index = {} for mycpv in metadata: mycat, mypf = catsplit(mycpv) pf_index.setdefault( mypf, []).append(metadata[mycpv]) possibilities = pf_index.get(myfile[:-5]) if possibilities: match = None for d in possibilities: try: if long(d["MTIME"]) != s[stat.ST_MTIME]: continue except (KeyError, ValueError): continue try: if long(d["SIZE"]) != long(s.st_size): continue except (KeyError, ValueError): continue if not self._pkgindex_keys.difference(d): match = d break if match: mycpv = match["CPV"] if mycpv in pkg_paths: # discard duplicates (All/ is preferred) continue pkg_paths[mycpv] = mypath # update the path if the package has been moved oldpath = d.get("PATH") if oldpath and oldpath != mypath: update_pkgindex = True if mypath != mycpv + ".tbz2": d["PATH"] = mypath if not oldpath: update_pkgindex = True else: d.pop("PATH", None) if oldpath: update_pkgindex = True self.dbapi.cpv_inject(mycpv) if not self.dbapi._aux_cache_keys.difference(d): aux_cache = self.dbapi._aux_cache_slot_dict() for k in self.dbapi._aux_cache_keys: aux_cache[k] = d[k] self.dbapi._aux_cache[mycpv] = aux_cache continue if not os.access(full_path, os.R_OK): writemsg(_("!!! Permission denied to read " \ "binary package: '%s'\n") % full_path, noiselevel=-1) self.invalids.append(myfile[:-5]) continue metadata_bytes = portage.xpak.tbz2(full_path).get_data() mycat = _unicode_decode(metadata_bytes.get( _unicode_encode("CATEGORY", encoding=_encodings['repo.content']), ""), encoding=_encodings['repo.content'], errors='replace') mypf = _unicode_decode(metadata_bytes.get( _unicode_encode("PF", encoding=_encodings['repo.content']), ""), encoding=_encodings['repo.content'], errors='replace') slot = _unicode_decode(metadata_bytes.get( _unicode_encode("SLOT", encoding=_encodings['repo.content']), ""), encoding=_encodings['repo.content'], errors='replace') mypkg = myfile[:-5] if not mycat or not mypf or not slot: #old-style or corrupt package writemsg(_("\n!!! Invalid binary package: '%s'\n") % full_path, noiselevel=-1) missing_keys = [] if not mycat: missing_keys.append("CATEGORY") if not mypf: missing_keys.append("PF") if not slot: missing_keys.append("SLOT") msg = [] if missing_keys: missing_keys.sort() msg.append(_("Missing metadata key(s): %s.") % \ ", ".join(missing_keys)) msg.append(_(" This binary package is not " \ "recoverable and should be deleted.")) from textwrap import wrap for line in wrap("".join(msg), 72): writemsg("!!! %s\n" % line, noiselevel=-1) self.invalids.append(mypkg) continue mycat = mycat.strip() slot = slot.strip() if mycat != mydir and mydir != "All": continue if mypkg != mypf.strip(): continue mycpv = mycat + "/" + mypkg if mycpv in pkg_paths: # All is first, so it's preferred. continue if not self.dbapi._category_re.match(mycat): writemsg(_("!!! Binary package has an " \ "unrecognized category: '%s'\n") % full_path, noiselevel=-1) writemsg(_("!!! '%s' has a category that is not" \ " listed in %setc/portage/categories\n") % \ (mycpv, self.settings["PORTAGE_CONFIGROOT"]), noiselevel=-1) continue pkg_paths[mycpv] = mypath self.dbapi.cpv_inject(mycpv) update_pkgindex = True d = metadata.get(mycpv, {}) if d: try: if long(d["MTIME"]) != s[stat.ST_MTIME]: d.clear() except (KeyError, ValueError): d.clear() if d: try: if long(d["SIZE"]) != long(s.st_size): d.clear() except (KeyError, ValueError): d.clear() d["CPV"] = mycpv d["SLOT"] = slot d["MTIME"] = str(s[stat.ST_MTIME]) d["SIZE"] = str(s.st_size) d.update(zip(self._pkgindex_aux_keys, self.dbapi.aux_get(mycpv, self._pkgindex_aux_keys))) try: self._eval_use_flags(mycpv, d) except portage.exception.InvalidDependString: writemsg(_("!!! Invalid binary package: '%s'\n") % \ self.getname(mycpv), noiselevel=-1) self.dbapi.cpv_remove(mycpv) del pkg_paths[mycpv] # record location if it's non-default if mypath != mycpv + ".tbz2": d["PATH"] = mypath else: d.pop("PATH", None) metadata[mycpv] = d if not self.dbapi._aux_cache_keys.difference(d): aux_cache = self.dbapi._aux_cache_slot_dict() for k in self.dbapi._aux_cache_keys: aux_cache[k] = d[k] self.dbapi._aux_cache[mycpv] = aux_cache for cpv in list(metadata): if cpv not in pkg_paths: del metadata[cpv] # Do not bother to write the Packages index if $PKGDIR/All/ exists # since it will provide no benefit due to the need to read CATEGORY # from xpak. if update_pkgindex and os.access(self.pkgdir, os.W_OK): del pkgindex.packages[:] pkgindex.packages.extend(iter(metadata.values())) self._update_pkgindex_header(pkgindex.header) f = atomic_ofstream(self._pkgindex_file) pkgindex.write(f) f.close() if getbinpkgs and not self.settings["PORTAGE_BINHOST"]: writemsg(_("!!! PORTAGE_BINHOST unset, but use is requested.\n"), noiselevel=-1) if getbinpkgs and 'PORTAGE_BINHOST' in self.settings: base_url = self.settings["PORTAGE_BINHOST"] from portage.const import CACHE_PATH try: from urllib.parse import urlparse except ImportError: from urlparse import urlparse urldata = urlparse(base_url) pkgindex_file = os.path.join(self.settings["ROOT"], CACHE_PATH, "binhost", urldata[1] + urldata[2], "Packages") pkgindex = self._new_pkgindex() try: f = codecs.open(_unicode_encode(pkgindex_file, encoding=_encodings['fs'], errors='strict'), mode='r', encoding=_encodings['repo.content'], errors='replace') try: pkgindex.read(f) finally: f.close() except EnvironmentError as e: if e.errno != errno.ENOENT: raise local_timestamp = pkgindex.header.get("TIMESTAMP", None) try: from urllib.request import urlopen as urllib_request_urlopen except ImportError: from urllib import urlopen as urllib_request_urlopen rmt_idx = self._new_pkgindex() try: # urlparse.urljoin() only works correctly with recognized # protocols and requires the base url to have a trailing # slash, so join manually... f = urllib_request_urlopen(base_url.rstrip("/") + "/Packages") f_dec = codecs.iterdecode(f, _encodings['repo.content'], errors='replace') try: rmt_idx.readHeader(f_dec) remote_timestamp = rmt_idx.header.get("TIMESTAMP", None) if not remote_timestamp: # no timestamp in the header, something's wrong pkgindex = None else: if not self._pkgindex_version_supported(rmt_idx): writemsg(_("\n\n!!! Binhost package index version" \ " is not supported: '%s'\n") % \ rmt_idx.header.get("VERSION"), noiselevel=-1) pkgindex = None elif local_timestamp != remote_timestamp: rmt_idx.readBody(f_dec) pkgindex = rmt_idx finally: f.close() except EnvironmentError as e: writemsg(_("\n\n!!! Error fetching binhost package" \ " info from '%s'\n") % base_url) writemsg("!!! %s\n\n" % str(e)) del e pkgindex = None if pkgindex is rmt_idx: pkgindex.modified = False # don't update the header try: ensure_dirs(os.path.dirname(pkgindex_file)) f = atomic_ofstream(pkgindex_file) pkgindex.write(f) f.close() except (IOError, PortageException): if os.access(os.path.dirname(pkgindex_file), os.W_OK): raise # The current user doesn't have permission to cache the # file, but that's alright. if pkgindex: # Organize remote package list as a cpv -> metadata map. self._remotepkgs = _pkgindex_cpv_map_latest_build(pkgindex) self._remote_has_index = True self._remote_base_uri = pkgindex.header.get("URI", base_url) self.__remotepkgs = {} for cpv in self._remotepkgs: self.dbapi.cpv_inject(cpv) self.populated = 1 if True: # Remote package instances override local package # if they are not identical. hash_names = ["SIZE"] + self._pkgindex_hashes for cpv, local_metadata in metadata.items(): remote_metadata = self._remotepkgs.get(cpv) if remote_metadata is None: continue # Use digests to compare identity. identical = True for hash_name in hash_names: local_value = local_metadata.get(hash_name) if local_value is None: continue remote_value = remote_metadata.get(hash_name) if remote_value is None: continue if local_value != remote_value: identical = False break if identical: del self._remotepkgs[cpv] else: # Override the local package in the aux_get cache. self.dbapi._aux_cache[cpv] = remote_metadata else: # Local package instances override remote instances. for cpv in metadata: self._remotepkgs.pop(cpv, None) return self._remotepkgs = {} try: chunk_size = long(self.settings["PORTAGE_BINHOST_CHUNKSIZE"]) if chunk_size < 8: chunk_size = 8 except (ValueError, KeyError): chunk_size = 3000 writemsg_stdout("\n") writemsg_stdout( colorize("GOOD", _("Fetching bininfo from ")) + \ re.sub(r'//(.+):.+@(.+)/', r'//\1:*password*@\2/', base_url) + "\n") self.__remotepkgs = portage.getbinpkg.dir_get_metadata( self.settings["PORTAGE_BINHOST"], chunk_size=chunk_size) #writemsg(green(" -- DONE!\n\n")) for mypkg in list(self.__remotepkgs): if "CATEGORY" not in self.__remotepkgs[mypkg]: #old-style or corrupt package writemsg(_("!!! Invalid remote binary package: %s\n") % mypkg, noiselevel=-1) del self.__remotepkgs[mypkg] continue mycat = self.__remotepkgs[mypkg]["CATEGORY"].strip() fullpkg = mycat+"/"+mypkg[:-5] if fullpkg in metadata: # When using this old protocol, comparison with the remote # package isn't supported, so the local package is always # preferred even if getbinpkgsonly is enabled. continue if not self.dbapi._category_re.match(mycat): writemsg(_("!!! Remote binary package has an " \ "unrecognized category: '%s'\n") % fullpkg, noiselevel=-1) writemsg(_("!!! '%s' has a category that is not" \ " listed in %setc/portage/categories\n") % \ (fullpkg, self.settings["PORTAGE_CONFIGROOT"]), noiselevel=-1) continue mykey = portage.cpv_getkey(fullpkg) try: # invalid tbz2's can hurt things. self.dbapi.cpv_inject(fullpkg) remote_metadata = self.__remotepkgs[mypkg] for k, v in remote_metadata.items(): remote_metadata[k] = v.strip() # Eliminate metadata values with names that digestCheck # uses, since they are not valid when using the old # protocol. Typically this is needed for SIZE metadata # which corresponds to the size of the unpacked files # rather than the binpkg file size, triggering digest # verification failures as reported in bug #303211. remote_metadata.pop('SIZE', None) for k in portage.checksum.hashfunc_map: remote_metadata.pop(k, None) self._remotepkgs[fullpkg] = remote_metadata except SystemExit as e: raise except: writemsg(_("!!! Failed to inject remote binary package: %s\n") % fullpkg, noiselevel=-1) del self.__remotepkgs[mypkg] continue self.populated=1
def _populate(self, getbinpkgs=0): if (not os.path.isdir(self.pkgdir) and not getbinpkgs): return 0 # Clear all caches in case populate is called multiple times # as may be the case when _global_updates calls populate() # prior to performing package moves since it only wants to # operate on local packages (getbinpkgs=0). self._remotepkgs = None self.dbapi._clear_cache() self.dbapi._aux_cache.clear() if True: pkg_paths = {} self._pkg_paths = pkg_paths dirs = listdir(self.pkgdir, dirsonly=True, EmptyOnError=True) if "All" in dirs: dirs.remove("All") dirs.sort() dirs.insert(0, "All") pkgindex = self._load_pkgindex() pf_index = None if not self._pkgindex_version_supported(pkgindex): pkgindex = self._new_pkgindex() header = pkgindex.header metadata = {} for d in pkgindex.packages: metadata[d["CPV"]] = d update_pkgindex = False for mydir in dirs: for myfile in listdir(os.path.join(self.pkgdir, mydir)): if not myfile.endswith(".tbz2"): continue mypath = os.path.join(mydir, myfile) full_path = os.path.join(self.pkgdir, mypath) s = os.lstat(full_path) if stat.S_ISLNK(s.st_mode): continue # Validate data from the package index and try to avoid # reading the xpak if possible. if mydir != "All": possibilities = None d = metadata.get(mydir+"/"+myfile[:-5]) if d: possibilities = [d] else: if pf_index is None: pf_index = {} for mycpv in metadata: mycat, mypf = catsplit(mycpv) pf_index.setdefault( mypf, []).append(metadata[mycpv]) possibilities = pf_index.get(myfile[:-5]) if possibilities: match = None for d in possibilities: try: if long(d["MTIME"]) != s[stat.ST_MTIME]: continue except (KeyError, ValueError): continue try: if long(d["SIZE"]) != long(s.st_size): continue except (KeyError, ValueError): continue if not self._pkgindex_keys.difference(d): match = d break if match: mycpv = match["CPV"] if mycpv in pkg_paths: # discard duplicates (All/ is preferred) continue pkg_paths[mycpv] = mypath # update the path if the package has been moved oldpath = d.get("PATH") if oldpath and oldpath != mypath: update_pkgindex = True if mypath != mycpv + ".tbz2": d["PATH"] = mypath if not oldpath: update_pkgindex = True else: d.pop("PATH", None) if oldpath: update_pkgindex = True self.dbapi.cpv_inject(mycpv) if not self.dbapi._aux_cache_keys.difference(d): aux_cache = self.dbapi._aux_cache_slot_dict() for k in self.dbapi._aux_cache_keys: aux_cache[k] = d[k] self.dbapi._aux_cache[mycpv] = aux_cache continue if not os.access(full_path, os.R_OK): writemsg(_("!!! Permission denied to read " \ "binary package: '%s'\n") % full_path, noiselevel=-1) self.invalids.append(myfile[:-5]) continue metadata_bytes = portage.xpak.tbz2(full_path).get_data() mycat = _unicode_decode(metadata_bytes.get(b"CATEGORY", ""), encoding=_encodings['repo.content'], errors='replace') mypf = _unicode_decode(metadata_bytes.get(b"PF", ""), encoding=_encodings['repo.content'], errors='replace') slot = _unicode_decode(metadata_bytes.get(b"SLOT", ""), encoding=_encodings['repo.content'], errors='replace') mypkg = myfile[:-5] if not mycat or not mypf or not slot: #old-style or corrupt package writemsg(_("\n!!! Invalid binary package: '%s'\n") % full_path, noiselevel=-1) missing_keys = [] if not mycat: missing_keys.append("CATEGORY") if not mypf: missing_keys.append("PF") if not slot: missing_keys.append("SLOT") msg = [] if missing_keys: missing_keys.sort() msg.append(_("Missing metadata key(s): %s.") % \ ", ".join(missing_keys)) msg.append(_(" This binary package is not " \ "recoverable and should be deleted.")) for line in textwrap.wrap("".join(msg), 72): writemsg("!!! %s\n" % line, noiselevel=-1) self.invalids.append(mypkg) continue mycat = mycat.strip() slot = slot.strip() if mycat != mydir and mydir != "All": continue if mypkg != mypf.strip(): continue mycpv = mycat + "/" + mypkg if mycpv in pkg_paths: # All is first, so it's preferred. continue if not self.dbapi._category_re.match(mycat): writemsg(_("!!! Binary package has an " \ "unrecognized category: '%s'\n") % full_path, noiselevel=-1) writemsg(_("!!! '%s' has a category that is not" \ " listed in %setc/portage/categories\n") % \ (mycpv, self.settings["PORTAGE_CONFIGROOT"]), noiselevel=-1) continue pkg_paths[mycpv] = mypath self.dbapi.cpv_inject(mycpv) update_pkgindex = True d = metadata.get(mycpv, {}) if d: try: if long(d["MTIME"]) != s[stat.ST_MTIME]: d.clear() except (KeyError, ValueError): d.clear() if d: try: if long(d["SIZE"]) != long(s.st_size): d.clear() except (KeyError, ValueError): d.clear() d["CPV"] = mycpv d["SLOT"] = slot d["MTIME"] = str(s[stat.ST_MTIME]) d["SIZE"] = str(s.st_size) d.update(zip(self._pkgindex_aux_keys, self.dbapi.aux_get(mycpv, self._pkgindex_aux_keys))) try: self._eval_use_flags(mycpv, d) except portage.exception.InvalidDependString: writemsg(_("!!! Invalid binary package: '%s'\n") % \ self.getname(mycpv), noiselevel=-1) self.dbapi.cpv_remove(mycpv) del pkg_paths[mycpv] # record location if it's non-default if mypath != mycpv + ".tbz2": d["PATH"] = mypath else: d.pop("PATH", None) metadata[mycpv] = d if not self.dbapi._aux_cache_keys.difference(d): aux_cache = self.dbapi._aux_cache_slot_dict() for k in self.dbapi._aux_cache_keys: aux_cache[k] = d[k] self.dbapi._aux_cache[mycpv] = aux_cache for cpv in list(metadata): if cpv not in pkg_paths: del metadata[cpv] # Do not bother to write the Packages index if $PKGDIR/All/ exists # since it will provide no benefit due to the need to read CATEGORY # from xpak. if update_pkgindex and os.access(self.pkgdir, os.W_OK): del pkgindex.packages[:] pkgindex.packages.extend(iter(metadata.values())) self._update_pkgindex_header(pkgindex.header) f = atomic_ofstream(self._pkgindex_file) pkgindex.write(f) f.close() if getbinpkgs and not self.settings["PORTAGE_BINHOST"]: writemsg(_("!!! PORTAGE_BINHOST unset, but use is requested.\n"), noiselevel=-1) if not getbinpkgs or 'PORTAGE_BINHOST' not in self.settings: self.populated=1 return self._remotepkgs = {} for base_url in self.settings["PORTAGE_BINHOST"].split(): parsed_url = urlparse(base_url) host = parsed_url.netloc port = parsed_url.port user = None passwd = None user_passwd = "" if "@" in host: user, host = host.split("@", 1) user_passwd = user + "@" if ":" in user: user, passwd = user.split(":", 1) port_args = [] if port is not None: port_str = ":%s" % (port,) if host.endswith(port_str): host = host[:-len(port_str)] pkgindex_file = os.path.join(self.settings["EROOT"], CACHE_PATH, "binhost", host, parsed_url.path.lstrip("/"), "Packages") pkgindex = self._new_pkgindex() try: f = io.open(_unicode_encode(pkgindex_file, encoding=_encodings['fs'], errors='strict'), mode='r', encoding=_encodings['repo.content'], errors='replace') try: pkgindex.read(f) finally: f.close() except EnvironmentError as e: if e.errno != errno.ENOENT: raise local_timestamp = pkgindex.header.get("TIMESTAMP", None) rmt_idx = self._new_pkgindex() proc = None tmp_filename = None try: # urlparse.urljoin() only works correctly with recognized # protocols and requires the base url to have a trailing # slash, so join manually... url = base_url.rstrip("/") + "/Packages" try: f = urllib_request_urlopen(url) except IOError: path = parsed_url.path.rstrip("/") + "/Packages" if parsed_url.scheme == 'sftp': # The sftp command complains about 'Illegal seek' if # we try to make it write to /dev/stdout, so use a # temp file instead. fd, tmp_filename = tempfile.mkstemp() os.close(fd) if port is not None: port_args = ['-P', "%s" % (port,)] proc = subprocess.Popen(['sftp'] + port_args + \ [user_passwd + host + ":" + path, tmp_filename]) if proc.wait() != os.EX_OK: raise f = open(tmp_filename, 'rb') elif parsed_url.scheme == 'ssh': if port is not None: port_args = ['-p', "%s" % (port,)] proc = subprocess.Popen(['ssh'] + port_args + \ [user_passwd + host, '--', 'cat', path], stdout=subprocess.PIPE) f = proc.stdout else: setting = 'FETCHCOMMAND_' + parsed_url.scheme.upper() fcmd = self.settings.get(setting) if not fcmd: raise fd, tmp_filename = tempfile.mkstemp() tmp_dirname, tmp_basename = os.path.split(tmp_filename) os.close(fd) success = portage.getbinpkg.file_get(url, tmp_dirname, fcmd=fcmd, filename=tmp_basename) if not success: raise EnvironmentError("%s failed" % (setting,)) f = open(tmp_filename, 'rb') f_dec = codecs.iterdecode(f, _encodings['repo.content'], errors='replace') try: rmt_idx.readHeader(f_dec) remote_timestamp = rmt_idx.header.get("TIMESTAMP", None) if not remote_timestamp: # no timestamp in the header, something's wrong pkgindex = None writemsg(_("\n\n!!! Binhost package index " \ " has no TIMESTAMP field.\n"), noiselevel=-1) else: if not self._pkgindex_version_supported(rmt_idx): writemsg(_("\n\n!!! Binhost package index version" \ " is not supported: '%s'\n") % \ rmt_idx.header.get("VERSION"), noiselevel=-1) pkgindex = None elif local_timestamp != remote_timestamp: rmt_idx.readBody(f_dec) pkgindex = rmt_idx finally: # Timeout after 5 seconds, in case close() blocks # indefinitely (see bug #350139). try: try: AlarmSignal.register(5) f.close() finally: AlarmSignal.unregister() except AlarmSignal: writemsg("\n\n!!! %s\n" % \ _("Timed out while closing connection to binhost"), noiselevel=-1) except EnvironmentError as e: writemsg(_("\n\n!!! Error fetching binhost package" \ " info from '%s'\n") % base_url) writemsg("!!! %s\n\n" % str(e)) del e pkgindex = None if proc is not None: if proc.poll() is None: proc.kill() proc.wait() proc = None if tmp_filename is not None: try: os.unlink(tmp_filename) except OSError: pass if pkgindex is rmt_idx: pkgindex.modified = False # don't update the header try: ensure_dirs(os.path.dirname(pkgindex_file)) f = atomic_ofstream(pkgindex_file) pkgindex.write(f) f.close() except (IOError, PortageException): if os.access(os.path.dirname(pkgindex_file), os.W_OK): raise # The current user doesn't have permission to cache the # file, but that's alright. if pkgindex: # Organize remote package list as a cpv -> metadata map. remotepkgs = _pkgindex_cpv_map_latest_build(pkgindex) remote_base_uri = pkgindex.header.get("URI", base_url) for cpv, remote_metadata in remotepkgs.items(): remote_metadata["BASE_URI"] = remote_base_uri self._pkgindex_uri[cpv] = url self._remotepkgs.update(remotepkgs) self._remote_has_index = True for cpv in remotepkgs: self.dbapi.cpv_inject(cpv) if True: # Remote package instances override local package # if they are not identical. hash_names = ["SIZE"] + self._pkgindex_hashes for cpv, local_metadata in metadata.items(): remote_metadata = self._remotepkgs.get(cpv) if remote_metadata is None: continue # Use digests to compare identity. identical = True for hash_name in hash_names: local_value = local_metadata.get(hash_name) if local_value is None: continue remote_value = remote_metadata.get(hash_name) if remote_value is None: continue if local_value != remote_value: identical = False break if identical: del self._remotepkgs[cpv] else: # Override the local package in the aux_get cache. self.dbapi._aux_cache[cpv] = remote_metadata else: # Local package instances override remote instances. for cpv in metadata: self._remotepkgs.pop(cpv, None) continue try: chunk_size = long(self.settings["PORTAGE_BINHOST_CHUNKSIZE"]) if chunk_size < 8: chunk_size = 8 except (ValueError, KeyError): chunk_size = 3000 writemsg_stdout("\n") writemsg_stdout( colorize("GOOD", _("Fetching bininfo from ")) + \ re.sub(r'//(.+):.+@(.+)/', r'//\1:*password*@\2/', base_url) + "\n") remotepkgs = portage.getbinpkg.dir_get_metadata( base_url, chunk_size=chunk_size) for mypkg, remote_metadata in remotepkgs.items(): mycat = remote_metadata.get("CATEGORY") if mycat is None: #old-style or corrupt package writemsg(_("!!! Invalid remote binary package: %s\n") % mypkg, noiselevel=-1) continue mycat = mycat.strip() fullpkg = mycat+"/"+mypkg[:-5] if fullpkg in metadata: # When using this old protocol, comparison with the remote # package isn't supported, so the local package is always # preferred even if getbinpkgsonly is enabled. continue if not self.dbapi._category_re.match(mycat): writemsg(_("!!! Remote binary package has an " \ "unrecognized category: '%s'\n") % fullpkg, noiselevel=-1) writemsg(_("!!! '%s' has a category that is not" \ " listed in %setc/portage/categories\n") % \ (fullpkg, self.settings["PORTAGE_CONFIGROOT"]), noiselevel=-1) continue mykey = portage.cpv_getkey(fullpkg) try: # invalid tbz2's can hurt things. self.dbapi.cpv_inject(fullpkg) for k, v in remote_metadata.items(): remote_metadata[k] = v.strip() remote_metadata["BASE_URI"] = base_url # Eliminate metadata values with names that digestCheck # uses, since they are not valid when using the old # protocol. Typically this is needed for SIZE metadata # which corresponds to the size of the unpacked files # rather than the binpkg file size, triggering digest # verification failures as reported in bug #303211. remote_metadata.pop('SIZE', None) for k in portage.checksum.hashfunc_map: remote_metadata.pop(k, None) self._remotepkgs[fullpkg] = remote_metadata except SystemExit as e: raise except: writemsg(_("!!! Failed to inject remote binary package: %s\n") % fullpkg, noiselevel=-1) continue self.populated=1