def test_xml_parser_primary_warnings(self): userdata = { "pkgs": [], "warnings": [] } def newpkgcb(pkgId, name, arch): pkg = cr.Package() userdata["pkgs"].append(pkg) return pkg def warningcb(warn_type, msg): userdata["warnings"].append((warn_type, msg)) cr.xml_parse_primary(PRIMARY_MULTI_WARN_00_PATH, newpkgcb, None, warningcb, 1) self.assertEqual([pkg.name for pkg in userdata["pkgs"]], ['fake_bash', 'super_kernel']) self.assertEqual(userdata["warnings"], [(0, 'Unknown element "fooelement"'), (1, 'Missing attribute "type" of a package element'), (0, 'Unknown element "foo"'), (3, 'Conversion of "foobar" to integer failed'), (2, 'Unknown file type "xxx"'), (0, 'Unknown element "bar"')])
def calculate_contenthash(path): if not os.path.isdir(path) or not os.path.isdir(os.path.join(path, "repodata/")): raise AttributeError("Not a repo: {0}".format(path)) repomd_path = os.path.join(path, "repodata/repomd.xml") repomd = cr.Repomd(repomd_path) primary_path = None for rec in repomd.records: if rec.type == "primary": primary_path = rec.location_href break if not primary_path: raise CalculationException("primary metadata are missing") pkgids = [] def pkgcb(pkg): pkgids.append("{0}{1}{2}".format(pkg.pkgId, pkg.location_href, pkg.location_base or "")) cr.xml_parse_primary(os.path.join(path, primary_path), pkgcb=pkgcb) contenthash = hashlib.new("sha256") for pkgid in sorted(pkgids): contenthash.update(pkgid) return contenthash.hexdigest()
def test_xml_parser_primary_snippet02(self): userdata = { "pkgs": [], "pkgcb_calls": 0, "warnings": [] } def newpkgcb(pkgId, name, arch): pkg = cr.Package() userdata["pkgs"].append(pkg) return pkg def pkgcb(pkg): userdata["pkgcb_calls"] += 1 def warningcb(warn_type, msg): userdata["warnings"].append((warn_type, msg)) cr.xml_parse_primary(REPO_02_PRIXML, newpkgcb, pkgcb, warningcb, 1) self.assertEqual([pkg.name for pkg in userdata["pkgs"]], ['fake_bash', 'super_kernel']) self.assertEqual(userdata["pkgcb_calls"], 2) self.assertEqual(userdata["warnings"], [])
def test_xml_parser_primary_repo02(self): userdata = { "pkgs": [], "pkgcb_calls": 0, "warnings": [] } def newpkgcb(pkgId, name, arch): pkg = cr.Package() userdata["pkgs"].append(pkg) return pkg def pkgcb(pkg): userdata["pkgcb_calls"] += 1 def warningcb(warn_type, msg): userdata["warnings"].append((warn_type, msg)) cr.xml_parse_primary(REPO_02_PRIXML, newpkgcb, pkgcb, warningcb, 1) self.assertEqual([pkg.name for pkg in userdata["pkgs"]], ['fake_bash', 'super_kernel']) self.assertEqual(userdata["pkgcb_calls"], 2) self.assertEqual(userdata["warnings"], [])
def calculate_contenthash(path): if not os.path.isdir(path) or \ not os.path.isdir(os.path.join(path, "repodata/")): raise AttributeError("Not a repo: {0}".format(path)) repomd_path = os.path.join(path, "repodata/repomd.xml") repomd = cr.Repomd(repomd_path) primary_path = None for rec in repomd.records: if rec.type == "primary": primary_path = rec.location_href break if not primary_path: raise CalculationException("primary metadata are missing") pkgids = [] def pkgcb(pkg): pkgids.append("{0}{1}{2}".format(pkg.pkgId, pkg.location_href, pkg.location_base or '')) cr.xml_parse_primary(os.path.join(path, primary_path), pkgcb=pkgcb) contenthash = hashlib.new("sha256") for pkgid in sorted(pkgids): contenthash.update(pkgid.encode('utf-8')) return contenthash.hexdigest()
def for_each_pkg_primary(self, pkgcb): """Execute a callback for each package, parsed from only primary package metadata. Only primary metadata means no files or changelogs. """ cr.xml_parse_primary(self.primary_xml_path, pkgcb=pkgcb, do_files=False)
async def parse_repodata(primary_xml_path, filelists_xml_path, other_xml_path): """ Parse repodata to extract package info. Args: primary_xml_path(str): a path to a downloaded primary.xml filelists_xml_path(str): a path to a downloaded filelists.xml other_xml_path(str): a path to a downloaded other.xml Returns: dict: createrepo_c package objects with the pkgId as a key """ def pkgcb(pkg): """ A callback which is used when a whole package entry in xml is parsed. Args: pkg(preaterepo_c.Package): a parsed metadata for a package """ packages[pkg.pkgId] = pkg def newpkgcb(pkgId, name, arch): """ A callback which is used when a new package entry is encountered. Only opening <package> element is parsed at that moment. This function has to return a package which parsed data will be added to or None if a package should be skipped. pkgId, name and arch of a package can be used to skip further parsing. Available only for filelists.xml and other.xml. Args: pkgId(str): pkgId of a package name(str): name of a package arch(str): arch of a package Returns: createrepo_c.Package: a package which parsed data should be added to. If None is returned, further parsing of a package will be skipped. """ return packages.get(pkgId, None) packages = {} # TODO: handle parsing errors/warnings, warningcb callback can be used below cr.xml_parse_primary(primary_xml_path, pkgcb=pkgcb, do_files=False) cr.xml_parse_filelists(filelists_xml_path, newpkgcb=newpkgcb) cr.xml_parse_other(other_xml_path, newpkgcb=newpkgcb) return packages
def test_xml_parser_primary_repo02_only_pkgcb(self): pkgs = [] def pkgcb(pkg): pkgs.append(pkg) cr.xml_parse_primary(REPO_02_PRIXML, None, pkgcb, None, 1) self.assertEqual([pkg.name for pkg in pkgs], ['fake_bash', 'super_kernel'])
def calculate_content_hash(path_to_primary_xml, checksum_type="sha256", logger=None): pkg_id_strs = [] if checksum_type == "sha": # Classical createrepo says sha but means sha1 - so let's keep things around packaging stack compatible checksum_type = "sha1" def pkgcb(pkg): pkg_id_strs.append(pkg_id_str(pkg, logger)) cr.xml_parse_primary(path_to_primary_xml, pkgcb=pkgcb, do_files=False) h = hashlib.new(checksum_type) for i in sorted(pkg_id_strs): h.update(i) return h.hexdigest()
def __index_repo_packages(self, cur, repo_id, repo_path): """ Extracts a packages list from a repository metadata and saves it to the database. Parameters ---------- cur : sqlite3.Cursor Database cursor. repo_id : int Repository id. repo_path : str Repository path. """ primary_xml_path = self.get_repomd_record_xml_path( repo_path, 'primary') save_cb = functools.partial(self.__save_repo_package, cur, repo_id) createrepo_c.xml_parse_primary(primary_xml_path, pkgcb=save_cb, do_files=False)
def parse_repodata(path): """ Return a list of packages included in this repository """ try: repomd = cr.Repomd(os.path.join(path, "repodata/repomd.xml")) except OSError as e: logging.error(e) exit(2) for record in repomd.records: if record.type == "primary": primary_xml_path = record.location_href def warningcb(warning_type, message): """Optional callback for warnings about wierd stuff and formatting in XML. :param warning_type: Integer value. One from the XML_WARNING_* constants. :param message: String message. """ logging.warning("PARSER WARNING: %s" % message) return True packages = [] def pkgcb(pkg): # Called when whole package entry in xml is parsed packages.append(pkg) cr.xml_parse_primary(os.path.join(path, primary_xml_path), pkgcb=pkgcb, do_files=False, warningcb=warningcb) return packages
def apply(self, metadata): # Check input arguments if "primary" not in metadata: self._error("primary.xml metadata file is missing") raise DeltaRepoPluginError("Primary metadata missing") gen_repomd_recs = [] removed_packages = {} pri_md = metadata.get("primary") fil_md = metadata.get("filelists") oth_md = metadata.get("other") def try_simple_delta(md, dbclass): if not md: return notes = self._metadata_notes_from_plugin_bundle(md.metadata_type) if not notes: self._warning("Metadata \"{0}\" doesn't have a record in " "deltametadata.xml - Ignoring") return True rc, rec = self._apply_basic_delta(md, notes) if not rc: return False if rec: gen_repomd_recs.append(rec) if not md.new_fn_exists: return True # Gen DB here if self.globalbundle.force_database or notes.get( "database") == "1": rec = self._gen_db_from_xml(md) gen_repomd_recs.append(rec) return True # At first try to simple delta simple_pri_delta = try_simple_delta(pri_md, cr.PrimarySqlite) simple_fil_delta = try_simple_delta(fil_md, cr.FilelistsSqlite) simple_oth_delta = try_simple_delta(oth_md, cr.OtherSqlite) if simple_pri_delta: assert simple_fil_delta assert simple_oth_delta return gen_repomd_recs # Ignore already processed metadata if simple_fil_delta: fil_md = None if simple_oth_delta: oth_md = None # Make a dict of removed packages key is location_href, # value is location_base for record in self.pluginbundle.get_list("removedpackage", []): location_href = record.get("location_href") if not location_href: continue location_base = record.get("location_base") removed_packages[location_href] = location_base # Prepare output xml files and check if dbs should be generated # Note: This information are stored directly to the Metadata # object which someone could see as little hacky. def prepare_paths_in_metadata(md, xmlclass, dbclass): if md is None: return notes = self._metadata_notes_from_plugin_bundle(md.metadata_type) if not notes: # TODO: Add flag to ignore this kind of warnings (?) self._warning("Metadata \"{0}\" doesn't have a record in " "deltametadata.xml - Ignoring") return suffix = cr.compression_suffix(md.compression_type) or "" md.new_fn = os.path.join( md.out_dir, "{0}.xml{1}".format(md.metadata_type, suffix)) md.new_f_stat = cr.ContentStat(md.checksum_type) md.new_f = xmlclass(md.new_fn, md.compression_type, md.new_f_stat) if self.globalbundle.force_database or notes.get( "database") == "1": md.db_fn = os.path.join(md.out_dir, "{0}.sqlite".format(md.metadata_type)) md.db = dbclass(md.db_fn) else: md.db_fn = None md.db = None # Primary prepare_paths_in_metadata(pri_md, cr.PrimaryXmlFile, cr.PrimarySqlite) # Filelists prepare_paths_in_metadata(fil_md, cr.FilelistsXmlFile, cr.FilelistsSqlite) # Other prepare_paths_in_metadata(oth_md, cr.OtherXmlFile, cr.OtherSqlite) # Apply delta all_packages = {} # dict { 'pkgId': pkg } old_contenthash_strings = [] new_contenthash_strings = [] def old_pkgcb(pkg): old_contenthash_strings.append(self._pkg_id_str(pkg)) if pkg.location_href in removed_packages: if removed_packages[pkg.location_href] == pkg.location_base: # This package won't be in new metadata return new_contenthash_strings.append(self._pkg_id_str(pkg)) all_packages[pkg.pkgId] = pkg def delta_pkgcb(pkg): new_contenthash_strings.append(self._pkg_id_str(pkg)) all_packages[pkg.pkgId] = pkg filelists_from_primary = True if fil_md: filelists_from_primary = False # Parse both old and delta primary.xml files cr.xml_parse_primary(pri_md.old_fn, pkgcb=old_pkgcb, do_files=filelists_from_primary) cr.xml_parse_primary(pri_md.delta_fn, pkgcb=delta_pkgcb, do_files=filelists_from_primary) # Calculate content hashes h = hashlib.new(self.globalbundle.contenthash_type_str) old_contenthash_strings.sort() for i in old_contenthash_strings: h.update(i) self.globalbundle.calculated_old_contenthash = h.hexdigest() h = hashlib.new(self.globalbundle.contenthash_type_str) new_contenthash_strings.sort() for i in new_contenthash_strings: h.update(i) self.globalbundle.calculated_new_contenthash = h.hexdigest() # Sort packages def cmp_pkgs(x, y): # Compare only by filename ret = cmp(os.path.basename(x.location_href), os.path.basename(y.location_href)) if ret != 0: return ret # Compare by full location_href path return cmp(x.location_href, y.location_href) all_packages_sorted = sorted(all_packages.values(), cmp=cmp_pkgs) def newpkgcb(pkgId, name, arch): return all_packages.get(pkgId, None) # Parse filelists if fil_md: self._debug("Parsing filelists xmls") cr.xml_parse_filelists(fil_md.old_fn, newpkgcb=newpkgcb) cr.xml_parse_filelists(fil_md.delta_fn, newpkgcb=newpkgcb) if oth_md: self._debug("Parsing other xmls") cr.xml_parse_other(oth_md.old_fn, newpkgcb=newpkgcb) cr.xml_parse_other(oth_md.delta_fn, newpkgcb=newpkgcb) num_of_packages = len(all_packages_sorted) # Write out primary self._debug("Writing primary xml: {0}".format(pri_md.new_fn)) pri_md.new_f.set_num_of_pkgs(num_of_packages) for pkg in all_packages_sorted: pri_md.new_f.add_pkg(pkg) if pri_md.db: pri_md.db.add_pkg(pkg) # Write out filelists if fil_md: self._debug("Writing filelists xml: {0}".format(fil_md.new_fn)) fil_md.new_f.set_num_of_pkgs(num_of_packages) for pkg in all_packages_sorted: fil_md.new_f.add_pkg(pkg) if fil_md.db: fil_md.db.add_pkg(pkg) # Write out other if oth_md: self._debug("Writing other xml: {0}".format(oth_md.new_fn)) oth_md.new_f.set_num_of_pkgs(num_of_packages) for pkg in all_packages_sorted: oth_md.new_f.add_pkg(pkg) if oth_md.db: oth_md.db.add_pkg(pkg) # Finish metadata def finish_metadata(md): if md is None: return # Close XML file md.new_f.close() # Prepare repomd record of xml file rec = cr.RepomdRecord(md.metadata_type, md.new_fn) rec.load_contentstat(md.new_f_stat) rec.fill(md.checksum_type) if self.globalbundle.unique_md_filenames: rec.rename_file() md.new_rec = rec md.new_fn_exists = True gen_repomd_recs.append(rec) # Prepare database if hasattr(md, "db") and md.db: self._debug("Generating database: {0}".format(md.db_fn)) md.db.dbinfo_update(rec.checksum) md.db.close() db_stat = cr.ContentStat(md.checksum_type) db_compressed = md.db_fn + ".bz2" cr.compress_file(md.db_fn, None, cr.BZ2, db_stat) os.remove(md.db_fn) # Prepare repomd record of database file db_rec = cr.RepomdRecord("{0}_db".format(md.metadata_type), db_compressed) db_rec.load_contentstat(db_stat) db_rec.fill(md.checksum_type) if self.globalbundle.unique_md_filenames: db_rec.rename_file() gen_repomd_recs.append(db_rec) # Add records to the bundle finish_metadata(pri_md) finish_metadata(fil_md) finish_metadata(oth_md) return gen_repomd_recs
def gen(self, metadata): # Check input arguments if "primary" not in metadata: self._error("primary.xml metadata file is missing") raise DeltaRepoPluginError("Primary metadata missing") gen_repomd_recs = [] # Medadata info that will be persistently stored metadata_notes = {} pri_md = metadata.get("primary") fil_md = metadata.get("filelists") oth_md = metadata.get("other") def try_simple_delta(md, force_gen=False): """Try to do simple delta. If successful, return True""" rc, rec, notes = self._gen_basic_delta(md, force_gen=force_gen) if not rc: return False if rec: gen_repomd_recs.append(rec) if not notes: notes = {} if metadata.get(md.metadata_type+"_db").new_fn_exists: notes["database"] = "1" else: notes["database"] = "0" self._metadata_notes_to_plugin_bundle(md.metadata_type, notes) return True # At first try to do simple delta for primary # If successful, force simple delta for filelists and other too simple_pri_delta = try_simple_delta(pri_md) simple_fil_delta = try_simple_delta(fil_md, force_gen=simple_pri_delta) simple_oth_delta = try_simple_delta(oth_md, force_gen=simple_pri_delta) if simple_pri_delta: # Simple delta for primary means that simple deltas were done # for all other metadata too return gen_repomd_recs # At this point we know that simple delta for the primary wasn't done # This mean that at lest for primary, both metadata files (the new one # and the old one) exists, and we have to do a more sophisticated delta # Ignore files for which, the simple delta was successful if simple_fil_delta: fil_md = None if simple_oth_delta: oth_md = None # Prepare output xml files and check if dbs should be generated # Note: This information are stored directly to the Metadata # object which someone could see as little hacky. def prepare_paths_in_metadata(md, xmlclass): if md is None: return None # Make a note about if the database should be generated db_available = metadata.get(md.metadata_type+"_db").new_fn_exists if db_available or self.globalbundle.force_database: metadata_notes.setdefault(md.metadata_type, {})["database"] = "1" else: metadata_notes.setdefault(md.metadata_type, {})["database"] = "0" suffix = cr.compression_suffix(md.compression_type) or "" md.delta_fn = os.path.join(md.out_dir, "{0}.xml{1}".format( md.metadata_type, suffix)) md.delta_f_stat = cr.ContentStat(md.checksum_type) md.delta_f = xmlclass(md.delta_fn, md.compression_type, md.delta_f_stat) return md # Primary pri_md = prepare_paths_in_metadata(pri_md, cr.PrimaryXmlFile) # Filelists fil_md = prepare_paths_in_metadata(fil_md, cr.FilelistsXmlFile) # Other oth_md = prepare_paths_in_metadata(oth_md, cr.OtherXmlFile) # Gen delta old_packages = set() added_packages = {} # dict { 'pkgId': pkg } added_packages_ids = [] # list of package ids old_contenthash_strings = [] new_contenthash_strings = [] def old_pkgcb(pkg): old_packages.add(self._pkg_id_tuple(pkg)) old_contenthash_strings.append(self._pkg_id_str(pkg)) def new_pkgcb(pkg): new_contenthash_strings.append(self._pkg_id_str(pkg)) pkg_id_tuple = self._pkg_id_tuple(pkg) if not pkg_id_tuple in old_packages: # This package is only in new repodata added_packages[pkg.pkgId] = pkg added_packages_ids.append(pkg.pkgId) else: # This package is also in the old repodata old_packages.remove(pkg_id_tuple) filelists_from_primary = True if fil_md: # Filelists will be parsed from filelists filelists_from_primary = False cr.xml_parse_primary(pri_md.old_fn, pkgcb=old_pkgcb, do_files=False) cr.xml_parse_primary(pri_md.new_fn, pkgcb=new_pkgcb, do_files=filelists_from_primary) # Calculate content hashes h = hashlib.new(self.globalbundle.contenthash_type_str) old_contenthash_strings.sort() for i in old_contenthash_strings: h.update(i) src_contenthash = h.hexdigest() self.globalbundle.calculated_old_contenthash = src_contenthash h = hashlib.new(self.globalbundle.contenthash_type_str) new_contenthash_strings.sort() for i in new_contenthash_strings: h.update(i) dst_contenthash = h.hexdigest() self.globalbundle.calculated_new_contenthash = dst_contenthash # Set the content hashes to the plugin bundle self.pluginbundle.set("contenthash_type", self.globalbundle.contenthash_type_str) self.pluginbundle.set("src_contenthash", src_contenthash) self.pluginbundle.set("dst_contenthash", dst_contenthash) # Prepare list of removed packages removed_pkgs = sorted(old_packages) for _, location_href, location_base in removed_pkgs: dictionary = {"location_href": location_href} if location_base: dictionary["location_base"] = location_base self.pluginbundle.append("removedpackage", dictionary) num_of_packages = len(added_packages) # Filelists and Other cb def newpkgcb(pkgId, name, arch): return added_packages.get(pkgId, None) # Parse filelist.xml and write out its delta if fil_md: cr.xml_parse_filelists(fil_md.new_fn, newpkgcb=newpkgcb) fil_md.delta_f.set_num_of_pkgs(num_of_packages) for pkgid in added_packages_ids: fil_md.delta_f.add_pkg(added_packages[pkgid]) fil_md.delta_f.close() # Parse other.xml and write out its delta if oth_md: cr.xml_parse_other(oth_md.new_fn, newpkgcb=newpkgcb) oth_md.delta_f.set_num_of_pkgs(num_of_packages) for pkgid in added_packages_ids: oth_md.delta_f.add_pkg(added_packages[pkgid]) oth_md.delta_f.close() # Write out primary delta # Note: Writing of primary delta has to be after parsing of filelists # Otherwise cause missing files if filelists_from_primary was False pri_md.delta_f.set_num_of_pkgs(num_of_packages) for pkgid in added_packages_ids: pri_md.delta_f.add_pkg(added_packages[pkgid]) pri_md.delta_f.close() # Finish metadata def finish_metadata(md): if md is None: return # Close XML file md.delta_f.close() # Prepare repomd record of xml file rec = cr.RepomdRecord(md.metadata_type, md.delta_fn) rec.load_contentstat(md.delta_f_stat) rec.fill(md.checksum_type) if self.globalbundle.unique_md_filenames: rec.rename_file() md.delta_rec = rec md.delta_fn_exists = True gen_repomd_recs.append(rec) # Prepare database if hasattr(md, "db") and md.db: md.db.dbinfo_update(rec.checksum) md.db.close() db_stat = cr.ContentStat(md.checksum_type) db_compressed = md.db_fn+".bz2" cr.compress_file(md.db_fn, None, cr.BZ2, db_stat) os.remove(md.db_fn) # Prepare repomd record of database file db_rec = cr.RepomdRecord("{0}_db".format(md.metadata_type), db_compressed) db_rec.load_contentstat(db_stat) db_rec.fill(md.checksum_type) if self.globalbundle.unique_md_filenames: db_rec.rename_file() gen_repomd_recs.append(db_rec) # Add records to medata objects finish_metadata(pri_md) finish_metadata(fil_md) finish_metadata(oth_md) # Store data persistently for metadata_type, notes in metadata_notes.items(): self._metadata_notes_to_plugin_bundle(metadata_type, notes) return gen_repomd_recs
def test_xml_parser_primary_repo01(self): userdata = { "pkgs": [], "pkgcb_calls": 0, "warnings": [] } def newpkgcb(pkgId, name, arch): pkg = cr.Package() userdata["pkgs"].append(pkg) return pkg def pkgcb(pkg): userdata["pkgcb_calls"] += 1 def warningcb(warn_type, msg): userdata["warnings"].append((warn_type, msg)) cr.xml_parse_primary(REPO_01_PRIXML, newpkgcb, pkgcb, warningcb, 1) self.assertEqual([pkg.name for pkg in userdata["pkgs"]], ['super_kernel']) self.assertEqual(userdata["pkgcb_calls"], 1) self.assertEqual(userdata["warnings"], []) pkg = userdata["pkgs"][0] self.assertEqual(pkg.pkgId, "152824bff2aa6d54f429d43e87a3ff3a0286505c6d93ec87692b5e3a9e3b97bf") self.assertEqual(pkg.name, "super_kernel") self.assertEqual(pkg.arch, "x86_64") self.assertEqual(pkg.version, "6.0.1") self.assertEqual(pkg.epoch, "0") self.assertEqual(pkg.release, "2") self.assertEqual(pkg.summary, "Test package") self.assertEqual(pkg.description, "This package has provides, requires, obsoletes, conflicts options.") self.assertEqual(pkg.url, "http://so_super_kernel.com/it_is_awesome/yep_it_really_is") self.assertEqual(pkg.time_file, 1334667003) self.assertEqual(pkg.time_build, 1334667003) self.assertEqual(pkg.rpm_license, "LGPLv2") self.assertEqual(pkg.rpm_vendor, None) self.assertEqual(pkg.rpm_group, "Applications/System") self.assertEqual(pkg.rpm_buildhost, "localhost.localdomain") self.assertEqual(pkg.rpm_sourcerpm, "super_kernel-6.0.1-2.src.rpm") self.assertEqual(pkg.rpm_header_start, 280) self.assertEqual(pkg.rpm_header_end, 2637) self.assertEqual(pkg.rpm_packager, None) self.assertEqual(pkg.size_package, 2845) self.assertEqual(pkg.size_installed, 0) self.assertEqual(pkg.size_archive, 404) self.assertEqual(pkg.location_href, "super_kernel-6.0.1-2.x86_64.rpm") self.assertEqual(pkg.location_base, None) self.assertEqual(pkg.checksum_type, "sha256") self.assertEqual(pkg.requires, [('bzip2', 'GE', '0', '1.0.0', None, True), ('expat', None, None, None, None, True), ('glib', 'GE', '0', '2.26.0', None, False), ('zlib', None, None, None, None, False)]) self.assertEqual(pkg.provides, [('not_so_super_kernel', 'LT', '0', '5.8.0', None, False), ('super_kernel', 'EQ', '0', '6.0.0', None, False), ('super_kernel', 'EQ', '0', '6.0.1', '2', False), ('super_kernel(x86-64)', 'EQ', '0', '6.0.1', '2', False)]) self.assertEqual(pkg.conflicts, [('kernel', None, None, None, None, False), ('super_kernel', 'EQ', '0', '5.0.0', None, False), ('super_kernel', 'LT', '0', '4.0.0', None, False)]) self.assertEqual(pkg.obsoletes, [('kernel', None, None, None, None, False), ('super_kernel', 'EQ', '0', '5.9.0', None, False)]) self.assertEqual(pkg.files, [(None, '/usr/bin/', 'super_kernel')]) self.assertEqual(pkg.changelogs, [])
def second_method(): """Prefered method for repodata parsing. Important callbacks for repodata parsing: newpkgcb -------- Via newpkgcb (Package callback) you could directly affect if the current package element shoud be parsed or not. This decision could be based on three values that are available as attributtes in the <package> element. This values are: - pkgId (package checksum) - name (package name) - arch (package architecture) (Note: This is applicable only for filelists.xml and other.xml, primary.xml doesn't contain this information in <package> element) If newpkgcb returns a package object, the parsed data will be loaded to this package object. If it returns a None, package element is skiped. This could help you to reduce a memory requirements because non wanted packages could be skiped without need to store them into the memory. If no newpkgcb is specified, default callback returning a new package object is used. pkgcb ----- Callback called when a <package> element parsing is done. Its argument is a package object that has been previously returned by the newpkgcb. This function should return True if parsing should continue or False if parsing should be interrupted. Note: Both callbacks are optional, BUT at least one MUST be used (newpkgcb or pkgcb)! warningcb --------- Warning callbacks is called when a non-fatal oddity of prased XML is detected. If True is returned, parsing continues. If return value is False, parsing is terminated. This callback is optional. """ primary_xml_path = None filelists_xml_path = None other_xml_path = None # # repomd.xml parsing # # Parse repomd.xml to get paths (1. Method - Repomd object based) # Pros: Easy to use repomd = cr.Repomd(os.path.join(REPO_PATH, "repodata/repomd.xml")) # Parse repomd.xml (2. Method - Parser based) # Pros: Warning callback could be specified def warningcb(warning_type, message): """Optional callback for warnings about wierd stuff and formatting in XML. :param warning_type: Integer value. One from the XML_WARNING_* constants. :param message: String message. """ print "PARSER WARNING: %s" % message return True repomd2 = cr.Repomd() cr.xml_parse_repomd(os.path.join(REPO_PATH, "repodata/repomd.xml"), repomd2, warningcb) # Get stuff we need # (repomd or repomd2 could be used, both have the same values) for record in repomd.records: if record.type == "primary": primary_xml_path = record.location_href elif record.type == "filelists": filelists_xml_path = record.location_href elif record.type == "other": other_xml_path = record.location_href # # Main XML metadata parsing (primary, filelists, other) # packages = {} def pkgcb(pkg): # Called when whole package entry in xml is parsed packages[pkg.pkgId] = pkg def newpkgcb(pkgId, name, arch): # Called when new package entry is encountered # And only opening <package> element is parsed # This function has to return a package to which # parsed data will be added or None if this package # should be skiped. return packages.get(pkgId, None) # Option do_files tells primary parser to skip <file> element of package. # If you plan to parse filelists.xml after the primary.xml, always # set do_files to False. cr.xml_parse_primary(os.path.join(REPO_PATH, primary_xml_path), pkgcb=pkgcb, do_files=False, warningcb=warningcb) cr.xml_parse_filelists(os.path.join(REPO_PATH, filelists_xml_path), newpkgcb=newpkgcb, warningcb=warningcb) cr.xml_parse_other(os.path.join(REPO_PATH, other_xml_path), newpkgcb=newpkgcb, warningcb=warningcb) for pkg in packages.itervalues(): print_package_info(pkg)
def oneshot_callback(): """Parse one file at a time into a set of packages. Use of this method is discouraged. newpkgcb -------- Via newpkgcb (Package callback) you could directly affect if the current package element should be parsed or not. This decision could be based on three values that are available as attributtes in the <package> element. This values are: - pkgId (package checksum) - name (package name) - arch (package architecture) (Note: This is applicable only for filelists.xml and other.xml, primary.xml doesn't contain this information in <package> element) If newpkgcb returns a package object, the parsed data will be loaded to this package object. If it returns a None, package element is skiped. This could help you to reduce a memory requirements because non wanted packages could be skiped without need to store them into the memory. If no newpkgcb is specified, default callback returning a new package object is used. pkgcb ----- Callback called when a <package> element parsing is done. Its argument is a package object that has been previously returned by the newpkgcb. This function should return True if parsing should continue or False if parsing should be interrupted. Note: Both callbacks are optional, BUT at least one MUST be used (newpkgcb or pkgcb)! warningcb --------- Warning callbacks is called when a non-fatal oddity of prased XML is detected. If True is returned, parsing continues. If return value is False, parsing is terminated. This callback is optional. """ primary_xml_path = None filelists_xml_path = None other_xml_path = None # # repomd.xml parsing # # Parse repomd.xml to get paths (1. Method - Repomd object based) # Pros: Easy to use repomd = cr.Repomd(os.path.join(REPO_PATH, "repodata/repomd.xml")) # Parse repomd.xml (2. Method - Parser based) # Pros: Warning callback could be specified def warningcb(warning_type, message): """Optional callback for warnings about wierd stuff and formatting in XML. :param warning_type: Integer value. One from the XML_WARNING_* constants. :param message: String message. """ print("PARSER WARNING: %s" % message) return True repomd2 = cr.Repomd() cr.xml_parse_repomd(os.path.join(REPO_PATH, "repodata/repomd.xml"), repomd2, warningcb) # Get stuff we need # (repomd or repomd2 could be used, both have the same values) for record in repomd.records: if record.type == "primary": primary_xml_path = record.location_href elif record.type == "filelists": filelists_xml_path = record.location_href elif record.type == "other": other_xml_path = record.location_href # # Main XML metadata parsing (primary, filelists, other) # packages = {} def pkgcb(pkg): # Called when whole package entry in xml is parsed packages[pkg.pkgId] = pkg def newpkgcb(pkgId, name, arch): # Called when new package entry is encountered # And only opening <package> element is parsed # This function has to return a package to which # parsed data will be added or None if this package # should be skiped. return packages.get(pkgId, None) # Option do_files tells primary parser to skip <file> element of package. # If you plan to parse filelists.xml after the primary.xml, always # set do_files to False. cr.xml_parse_primary(os.path.join(REPO_PATH, primary_xml_path), pkgcb=pkgcb, do_files=False, warningcb=warningcb) cr.xml_parse_filelists(os.path.join(REPO_PATH, filelists_xml_path), newpkgcb=newpkgcb, warningcb=warningcb) cr.xml_parse_other(os.path.join(REPO_PATH, other_xml_path), newpkgcb=newpkgcb, warningcb=warningcb) for pkg in packages.values(): print_package_info(pkg)
def apply(self, metadata): # Check input arguments if "primary" not in metadata: self._error("primary.xml metadata file is missing") raise DeltaRepoPluginError("Primary metadata missing") gen_repomd_recs = [] removed_packages = {} pri_md = metadata.get("primary") fil_md = metadata.get("filelists") oth_md = metadata.get("other") def try_simple_delta(md, dbclass): if not md: return notes = self._metadata_notes_from_plugin_bundle(md.metadata_type) if not notes: self._warning("Metadata \"{0}\" doesn't have a record in " "deltametadata.xml - Ignoring") return True rc, rec = self._apply_basic_delta(md, notes) if not rc: return False if rec: gen_repomd_recs.append(rec) if not md.new_fn_exists: return True # Gen DB here if self.globalbundle.force_database or notes.get("database") == "1": rec = self._gen_db_from_xml(md) gen_repomd_recs.append(rec) return True # At first try to simple delta simple_pri_delta = try_simple_delta(pri_md, cr.PrimarySqlite) simple_fil_delta = try_simple_delta(fil_md, cr.FilelistsSqlite) simple_oth_delta = try_simple_delta(oth_md, cr.OtherSqlite) if simple_pri_delta: assert simple_fil_delta assert simple_oth_delta return gen_repomd_recs # Ignore already processed metadata if simple_fil_delta: fil_md = None if simple_oth_delta: oth_md = None # Make a dict of removed packages key is location_href, # value is location_base for record in self.pluginbundle.get_list("removedpackage", []): location_href = record.get("location_href") if not location_href: continue location_base = record.get("location_base") removed_packages[location_href] = location_base # Prepare output xml files and check if dbs should be generated # Note: This information are stored directly to the Metadata # object which someone could see as little hacky. def prepare_paths_in_metadata(md, xmlclass, dbclass): if md is None: return notes = self._metadata_notes_from_plugin_bundle(md.metadata_type) if not notes: # TODO: Add flag to ignore this kind of warnings (?) self._warning("Metadata \"{0}\" doesn't have a record in " "deltametadata.xml - Ignoring") return suffix = cr.compression_suffix(md.compression_type) or "" md.new_fn = os.path.join(md.out_dir, "{0}.xml{1}".format( md.metadata_type, suffix)) md.new_f_stat = cr.ContentStat(md.checksum_type) md.new_f = xmlclass(md.new_fn, md.compression_type, md.new_f_stat) if self.globalbundle.force_database or notes.get("database") == "1": md.db_fn = os.path.join(md.out_dir, "{0}.sqlite".format( md.metadata_type)) md.db = dbclass(md.db_fn) else: md.db_fn = None md.db = None # Primary prepare_paths_in_metadata(pri_md, cr.PrimaryXmlFile, cr.PrimarySqlite) # Filelists prepare_paths_in_metadata(fil_md, cr.FilelistsXmlFile, cr.FilelistsSqlite) # Other prepare_paths_in_metadata(oth_md, cr.OtherXmlFile, cr.OtherSqlite) # Apply delta all_packages = {} # dict { 'pkgId': pkg } old_contenthash_strings = [] new_contenthash_strings = [] def old_pkgcb(pkg): old_contenthash_strings.append(self._pkg_id_str(pkg)) if pkg.location_href in removed_packages: if removed_packages[pkg.location_href] == pkg.location_base: # This package won't be in new metadata return new_contenthash_strings.append(self._pkg_id_str(pkg)) all_packages[pkg.pkgId] = pkg def delta_pkgcb(pkg): new_contenthash_strings.append(self._pkg_id_str(pkg)) all_packages[pkg.pkgId] = pkg filelists_from_primary = True if fil_md: filelists_from_primary = False # Parse both old and delta primary.xml files cr.xml_parse_primary(pri_md.old_fn, pkgcb=old_pkgcb, do_files=filelists_from_primary) cr.xml_parse_primary(pri_md.delta_fn, pkgcb=delta_pkgcb, do_files=filelists_from_primary) # Calculate content hashes h = hashlib.new(self.globalbundle.contenthash_type_str) old_contenthash_strings.sort() for i in old_contenthash_strings: h.update(i) self.globalbundle.calculated_old_contenthash = h.hexdigest() h = hashlib.new(self.globalbundle.contenthash_type_str) new_contenthash_strings.sort() for i in new_contenthash_strings: h.update(i) self.globalbundle.calculated_new_contenthash = h.hexdigest() # Sort packages def cmp_pkgs(x, y): # Compare only by filename ret = cmp(os.path.basename(x.location_href), os.path.basename(y.location_href)) if ret != 0: return ret # Compare by full location_href path return cmp(x.location_href, y.location_href) all_packages_sorted = sorted(all_packages.values(), cmp=cmp_pkgs) def newpkgcb(pkgId, name, arch): return all_packages.get(pkgId, None) # Parse filelists if fil_md: self._debug("Parsing filelists xmls") cr.xml_parse_filelists(fil_md.old_fn, newpkgcb=newpkgcb) cr.xml_parse_filelists(fil_md.delta_fn, newpkgcb=newpkgcb) if oth_md: self._debug("Parsing other xmls") cr.xml_parse_other(oth_md.old_fn, newpkgcb=newpkgcb) cr.xml_parse_other(oth_md.delta_fn, newpkgcb=newpkgcb) num_of_packages = len(all_packages_sorted) # Write out primary self._debug("Writing primary xml: {0}".format(pri_md.new_fn)) pri_md.new_f.set_num_of_pkgs(num_of_packages) for pkg in all_packages_sorted: pri_md.new_f.add_pkg(pkg) if pri_md.db: pri_md.db.add_pkg(pkg) # Write out filelists if fil_md: self._debug("Writing filelists xml: {0}".format(fil_md.new_fn)) fil_md.new_f.set_num_of_pkgs(num_of_packages) for pkg in all_packages_sorted: fil_md.new_f.add_pkg(pkg) if fil_md.db: fil_md.db.add_pkg(pkg) # Write out other if oth_md: self._debug("Writing other xml: {0}".format(oth_md.new_fn)) oth_md.new_f.set_num_of_pkgs(num_of_packages) for pkg in all_packages_sorted: oth_md.new_f.add_pkg(pkg) if oth_md.db: oth_md.db.add_pkg(pkg) # Finish metadata def finish_metadata(md): if md is None: return # Close XML file md.new_f.close() # Prepare repomd record of xml file rec = cr.RepomdRecord(md.metadata_type, md.new_fn) rec.load_contentstat(md.new_f_stat) rec.fill(md.checksum_type) if self.globalbundle.unique_md_filenames: rec.rename_file() md.new_rec = rec md.new_fn_exists = True gen_repomd_recs.append(rec) # Prepare database if hasattr(md, "db") and md.db: self._debug("Generating database: {0}".format(md.db_fn)) md.db.dbinfo_update(rec.checksum) md.db.close() db_stat = cr.ContentStat(md.checksum_type) db_compressed = md.db_fn+".bz2" cr.compress_file(md.db_fn, None, cr.BZ2, db_stat) os.remove(md.db_fn) # Prepare repomd record of database file db_rec = cr.RepomdRecord("{0}_db".format(md.metadata_type), db_compressed) db_rec.load_contentstat(db_stat) db_rec.fill(md.checksum_type) if self.globalbundle.unique_md_filenames: db_rec.rename_file() gen_repomd_recs.append(db_rec) # Add records to the bundle finish_metadata(pri_md) finish_metadata(fil_md) finish_metadata(oth_md) return gen_repomd_recs
def test_xml_parser_primary_repo01(self): userdata = {"pkgs": [], "pkgcb_calls": 0, "warnings": []} def newpkgcb(pkgId, name, arch): pkg = cr.Package() userdata["pkgs"].append(pkg) return pkg def pkgcb(pkg): userdata["pkgcb_calls"] += 1 def warningcb(warn_type, msg): userdata["warnings"].append((warn_type, msg)) cr.xml_parse_primary(REPO_01_PRIXML, newpkgcb, pkgcb, warningcb, 1) self.assertEqual([pkg.name for pkg in userdata["pkgs"]], ['super_kernel']) self.assertEqual(userdata["pkgcb_calls"], 1) self.assertEqual(userdata["warnings"], []) pkg = userdata["pkgs"][0] self.assertEqual( pkg.pkgId, "152824bff2aa6d54f429d43e87a3ff3a0286505c6d93ec87692b5e3a9e3b97bf") self.assertEqual(pkg.name, "super_kernel") self.assertEqual(pkg.arch, "x86_64") self.assertEqual(pkg.version, "6.0.1") self.assertEqual(pkg.epoch, "0") self.assertEqual(pkg.release, "2") self.assertEqual(pkg.summary, "Test package") self.assertEqual( pkg.description, "This package has provides, requires, obsoletes, conflicts options." ) self.assertEqual( pkg.url, "http://so_super_kernel.com/it_is_awesome/yep_it_really_is") self.assertEqual(pkg.time_file, 1334667003) self.assertEqual(pkg.time_build, 1334667003) self.assertEqual(pkg.rpm_license, "LGPLv2") self.assertEqual(pkg.rpm_vendor, None) self.assertEqual(pkg.rpm_group, "Applications/System") self.assertEqual(pkg.rpm_buildhost, "localhost.localdomain") self.assertEqual(pkg.rpm_sourcerpm, "super_kernel-6.0.1-2.src.rpm") self.assertEqual(pkg.rpm_header_start, 280) self.assertEqual(pkg.rpm_header_end, 2637) self.assertEqual(pkg.rpm_packager, None) self.assertEqual(pkg.size_package, 2845) self.assertEqual(pkg.size_installed, 0) self.assertEqual(pkg.size_archive, 404) self.assertEqual(pkg.location_href, "super_kernel-6.0.1-2.x86_64.rpm") self.assertEqual(pkg.location_base, None) self.assertEqual(pkg.checksum_type, "sha256") self.assertEqual(pkg.requires, [('bzip2', 'GE', '0', '1.0.0', None, True), ('expat', None, None, None, None, True), ('glib', 'GE', '0', '2.26.0', None, False), ('zlib', None, None, None, None, False)]) self.assertEqual( pkg.provides, [('not_so_super_kernel', 'LT', '0', '5.8.0', None, False), ('super_kernel', 'EQ', '0', '6.0.0', None, False), ('super_kernel', 'EQ', '0', '6.0.1', '2', False), ('super_kernel(x86-64)', 'EQ', '0', '6.0.1', '2', False)]) self.assertEqual(pkg.conflicts, [('kernel', None, None, None, None, False), ('super_kernel', 'EQ', '0', '5.0.0', None, False), ('super_kernel', 'LT', '0', '4.0.0', None, False)]) self.assertEqual(pkg.obsoletes, [('kernel', None, None, None, None, False), ('super_kernel', 'EQ', '0', '5.9.0', None, False)]) self.assertEqual(pkg.files, [(None, '/usr/bin/', 'super_kernel')]) self.assertEqual(pkg.changelogs, [])
def apply(self, pri_old_fn, pri_delta_fn, pri_f, pri_db, fil_old_fn, fil_delta_fn, fil_f, fil_db,oth_old_fn, oth_delta_fn, oth_f, oth_db, removed): removed_packages = set() # set of pkgIds (hashes) all_packages = {} # dict { 'pkgId': pkg } old_repoid_strings = [] new_repoid_strings = [] def old_pkgcb(pkg): old_repoid_strings.append(self._pkg_id_str(pkg)) if pkg.location_href in removed.packages: if removed.packages[pkg.location_href] == pkg.location_base: # This package won't be in new metadata return new_repoid_strings.append(self._pkg_id_str(pkg)) all_packages[pkg.pkgId] = pkg def delta_pkgcb(pkg): new_repoid_strings.append(self._pkg_id_str(pkg)) all_packages[pkg.pkgId] = pkg do_primary_files = 1 if fil_f and fil_delta_fn and fil_old_fn: do_primary_files = 0 cr.xml_parse_primary(pri_old_fn, pkgcb=old_pkgcb, do_files=do_primary_files) cr.xml_parse_primary(pri_delta_fn, pkgcb=delta_pkgcb, do_files=do_primary_files) # Calculate RepoIds old_repo_id = "" new_repo_id = "" h = hashlib.new(self.id_type) old_repoid_strings.sort() for i in old_repoid_strings: h.update(i) old_repo_id = h.hexdigest() h = hashlib.new(self.id_type) new_repoid_strings.sort() for i in new_repoid_strings: h.update(i) new_repo_id = h.hexdigest() # Sort packages def cmp_pkgs(x, y): # Compare only by filename ret = cmp(os.path.basename(x.location_href), os.path.basename(y.location_href)) if ret != 0: return ret # Compare by full location_href path return cmp(x.location_href, y.location_href) all_packages_sorted = sorted(all_packages.values(), cmp=cmp_pkgs) def newpkgcb(pkgId, name, arch): return all_packages.get(pkgId, None) # Parse filelists if fil_f and fil_delta_fn and fil_old_fn: cr.xml_parse_filelists(fil_old_fn, newpkgcb=newpkgcb) cr.xml_parse_filelists(fil_delta_fn, newpkgcb=newpkgcb) # Parse other if oth_f and oth_delta_fn and oth_old_fn: cr.xml_parse_other(oth_old_fn, newpkgcb=newpkgcb) cr.xml_parse_other(oth_delta_fn, newpkgcb=newpkgcb) num_of_packages = len(all_packages_sorted) # Write out primary pri_f.set_num_of_pkgs(num_of_packages) for pkg in all_packages_sorted: pri_f.add_pkg(pkg) if pri_db: pri_db.add_pkg(pkg) # Write out filelists if fil_f: fil_f.set_num_of_pkgs(num_of_packages) for pkg in all_packages_sorted: fil_f.add_pkg(pkg) if fil_db: fil_db.add_pkg(pkg) # Write out other if oth_f: oth_f.set_num_of_pkgs(num_of_packages) for pkg in all_packages_sorted: oth_f.add_pkg(pkg) if oth_db: oth_db.add_pkg(pkg) return (old_repo_id, new_repo_id)
def do(self, pri_old_fn, pri_new_fn, pri_f, fil_new_fn, fil_f, oth_new_fn, oth_f, removed): old_packages = set() added_packages = {} # dict { 'pkgId': pkg } added_packages_ids = [] # list of package ids old_repoid_strings = [] new_repoid_strings = [] def old_pkgcb(pkg): old_packages.add(self._pkg_id_tuple(pkg)) old_repoid_strings.append(self._pkg_id_str(pkg)) def new_pkgcb(pkg): new_repoid_strings.append(self._pkg_id_str(pkg)) pkg_id_tuple = self._pkg_id_tuple(pkg) if not pkg_id_tuple in old_packages: # This package is only in new repodata added_packages[pkg.pkgId] = pkg added_packages_ids.append(pkg.pkgId) else: # This package is also in the old repodata old_packages.remove(pkg_id_tuple) do_new_primary_files = 1 if fil_f and fil_new_fn: # All files will be parsed from filelists do_new_primary_files = 0 cr.xml_parse_primary(pri_old_fn, pkgcb=old_pkgcb, do_files=0) cr.xml_parse_primary(pri_new_fn, pkgcb=new_pkgcb, do_files=do_new_primary_files) # Calculate RepoIds old_repo_id = "" new_repo_id = "" h = hashlib.new(self.id_type) old_repoid_strings.sort() for i in old_repoid_strings: h.update(i) old_repo_id = h.hexdigest() h = hashlib.new(self.id_type) new_repoid_strings.sort() for i in new_repoid_strings: h.update(i) new_repo_id = h.hexdigest() removed_pkgs = sorted(old_packages) for _, location_href, location_base in removed_pkgs: removed.add_pkg_locations(location_href, location_base) num_of_packages = len(added_packages) # Filelists and Other cb def newpkgcb(pkgId, name, arch): return added_packages.get(pkgId, None) # Write out filelists delta if fil_f and fil_new_fn: cr.xml_parse_filelists(fil_new_fn, newpkgcb=newpkgcb) fil_f.set_num_of_pkgs(num_of_packages) for pkgid in added_packages_ids: fil_f.add_pkg(added_packages[pkgid]) fil_f.close() # Write out other delta if oth_f and oth_new_fn: cr.xml_parse_other(oth_new_fn, newpkgcb=newpkgcb) oth_f.set_num_of_pkgs(num_of_packages) for pkgid in added_packages_ids: oth_f.add_pkg(added_packages[pkgid]) oth_f.close() # Write out primary delta # Note: Writing of primary delta has to be after parsing of filelists # Otherway cause missing files if do_new_primary_files was 0 pri_f.set_num_of_pkgs(num_of_packages) for pkgid in added_packages_ids: pri_f.add_pkg(added_packages[pkgid]) pri_f.close() return (old_repo_id, new_repo_id)
def gen(self, metadata): # Check input arguments if "primary" not in metadata: self._error("primary.xml metadata file is missing") raise DeltaRepoPluginError("Primary metadata missing") gen_repomd_recs = [] # Medadata info that will be persistently stored metadata_notes = {} pri_md = metadata.get("primary") fil_md = metadata.get("filelists") oth_md = metadata.get("other") def try_simple_delta(md, force_gen=False): """Try to do simple delta. If successful, return True""" rc, rec, notes = self._gen_basic_delta(md, force_gen=force_gen) if not rc: return False if rec: gen_repomd_recs.append(rec) if not notes: notes = {} if metadata.get(md.metadata_type + "_db").new_fn_exists: notes["database"] = "1" else: notes["database"] = "0" self._metadata_notes_to_plugin_bundle(md.metadata_type, notes) return True # At first try to do simple delta for primary # If successful, force simple delta for filelists and other too simple_pri_delta = try_simple_delta(pri_md) simple_fil_delta = try_simple_delta(fil_md, force_gen=simple_pri_delta) simple_oth_delta = try_simple_delta(oth_md, force_gen=simple_pri_delta) if simple_pri_delta: # Simple delta for primary means that simple deltas were done # for all other metadata too return gen_repomd_recs # At this point we know that simple delta for the primary wasn't done # This mean that at lest for primary, both metadata files (the new one # and the old one) exists, and we have to do a more sophisticated delta # Ignore files for which, the simple delta was successful if simple_fil_delta: fil_md = None if simple_oth_delta: oth_md = None # Prepare output xml files and check if dbs should be generated # Note: This information are stored directly to the Metadata # object which someone could see as little hacky. def prepare_paths_in_metadata(md, xmlclass): if md is None: return None # Make a note about if the database should be generated db_available = metadata.get(md.metadata_type + "_db").new_fn_exists if db_available or self.globalbundle.force_database: metadata_notes.setdefault(md.metadata_type, {})["database"] = "1" else: metadata_notes.setdefault(md.metadata_type, {})["database"] = "0" suffix = cr.compression_suffix(md.compression_type) or "" md.delta_fn = os.path.join( md.out_dir, "{0}.xml{1}".format(md.metadata_type, suffix)) md.delta_f_stat = cr.ContentStat(md.checksum_type) md.delta_f = xmlclass(md.delta_fn, md.compression_type, md.delta_f_stat) return md # Primary pri_md = prepare_paths_in_metadata(pri_md, cr.PrimaryXmlFile) # Filelists fil_md = prepare_paths_in_metadata(fil_md, cr.FilelistsXmlFile) # Other oth_md = prepare_paths_in_metadata(oth_md, cr.OtherXmlFile) # Gen delta old_packages = set() added_packages = {} # dict { 'pkgId': pkg } added_packages_ids = [] # list of package ids old_contenthash_strings = [] new_contenthash_strings = [] def old_pkgcb(pkg): old_packages.add(self._pkg_id_tuple(pkg)) old_contenthash_strings.append(self._pkg_id_str(pkg)) def new_pkgcb(pkg): new_contenthash_strings.append(self._pkg_id_str(pkg)) pkg_id_tuple = self._pkg_id_tuple(pkg) if not pkg_id_tuple in old_packages: # This package is only in new repodata added_packages[pkg.pkgId] = pkg added_packages_ids.append(pkg.pkgId) else: # This package is also in the old repodata old_packages.remove(pkg_id_tuple) filelists_from_primary = True if fil_md: # Filelists will be parsed from filelists filelists_from_primary = False cr.xml_parse_primary(pri_md.old_fn, pkgcb=old_pkgcb, do_files=False) cr.xml_parse_primary(pri_md.new_fn, pkgcb=new_pkgcb, do_files=filelists_from_primary) # Calculate content hashes h = hashlib.new(self.globalbundle.contenthash_type_str) old_contenthash_strings.sort() for i in old_contenthash_strings: h.update(i) src_contenthash = h.hexdigest() self.globalbundle.calculated_old_contenthash = src_contenthash h = hashlib.new(self.globalbundle.contenthash_type_str) new_contenthash_strings.sort() for i in new_contenthash_strings: h.update(i) dst_contenthash = h.hexdigest() self.globalbundle.calculated_new_contenthash = dst_contenthash # Set the content hashes to the plugin bundle self.pluginbundle.set("contenthash_type", self.globalbundle.contenthash_type_str) self.pluginbundle.set("src_contenthash", src_contenthash) self.pluginbundle.set("dst_contenthash", dst_contenthash) # Prepare list of removed packages removed_pkgs = sorted(old_packages) for _, location_href, location_base in removed_pkgs: dictionary = {"location_href": location_href} if location_base: dictionary["location_base"] = location_base self.pluginbundle.append("removedpackage", dictionary) num_of_packages = len(added_packages) # Filelists and Other cb def newpkgcb(pkgId, name, arch): return added_packages.get(pkgId, None) # Parse filelist.xml and write out its delta if fil_md: cr.xml_parse_filelists(fil_md.new_fn, newpkgcb=newpkgcb) fil_md.delta_f.set_num_of_pkgs(num_of_packages) for pkgid in added_packages_ids: fil_md.delta_f.add_pkg(added_packages[pkgid]) fil_md.delta_f.close() # Parse other.xml and write out its delta if oth_md: cr.xml_parse_other(oth_md.new_fn, newpkgcb=newpkgcb) oth_md.delta_f.set_num_of_pkgs(num_of_packages) for pkgid in added_packages_ids: oth_md.delta_f.add_pkg(added_packages[pkgid]) oth_md.delta_f.close() # Write out primary delta # Note: Writing of primary delta has to be after parsing of filelists # Otherwise cause missing files if filelists_from_primary was False pri_md.delta_f.set_num_of_pkgs(num_of_packages) for pkgid in added_packages_ids: pri_md.delta_f.add_pkg(added_packages[pkgid]) pri_md.delta_f.close() # Finish metadata def finish_metadata(md): if md is None: return # Close XML file md.delta_f.close() # Prepare repomd record of xml file rec = cr.RepomdRecord(md.metadata_type, md.delta_fn) rec.load_contentstat(md.delta_f_stat) rec.fill(md.checksum_type) if self.globalbundle.unique_md_filenames: rec.rename_file() md.delta_rec = rec md.delta_fn_exists = True gen_repomd_recs.append(rec) # Prepare database if hasattr(md, "db") and md.db: md.db.dbinfo_update(rec.checksum) md.db.close() db_stat = cr.ContentStat(md.checksum_type) db_compressed = md.db_fn + ".bz2" cr.compress_file(md.db_fn, None, cr.BZ2, db_stat) os.remove(md.db_fn) # Prepare repomd record of database file db_rec = cr.RepomdRecord("{0}_db".format(md.metadata_type), db_compressed) db_rec.load_contentstat(db_stat) db_rec.fill(md.checksum_type) if self.globalbundle.unique_md_filenames: db_rec.rename_file() gen_repomd_recs.append(db_rec) # Add records to medata objects finish_metadata(pri_md) finish_metadata(fil_md) finish_metadata(oth_md) # Store data persistently for metadata_type, notes in metadata_notes.items(): self._metadata_notes_to_plugin_bundle(metadata_type, notes) return gen_repomd_recs
def parse_repodata(primary_xml_path, filelists_xml_path, other_xml_path, only_primary=False, mirror=False): """ Parse repodata to extract package info. Args: primary_xml_path (str): a path to a downloaded primary.xml filelists_xml_path (str): a path to a downloaded filelists.xml other_xml_path (str): a path to a downloaded other.xml Kwargs: only_primary (bool): If true, only the metadata in primary.xml will be parsed. Returns: dict: createrepo_c package objects with the pkgId as a key """ packages = collections.OrderedDict() nevras = set() pkgid_warning_triggered = False nevra_warning_triggered = False def pkgcb(pkg): """ A callback which is used when a whole package entry in xml is parsed. Args: pkg(preaterepo_c.Package): a parsed metadata for a package """ nonlocal pkgid_warning_triggered nonlocal nevra_warning_triggered ERR_MSG = _( "The repository metadata being synced into Pulp is erroneous in a way that " "makes it ambiguous (duplicate {}), and therefore we do not allow it to be synced in " "'mirror_complete' mode. Please choose a sync policy which does not mirror " "repository metadata.\n\n" "Please read https://github.com/pulp/pulp_rpm/issues/2402 for more details." ) WARN_MSG = _( "The repository metadata being synced into Pulp is erroneous in a way that " "makes it ambiguous (duplicate {}). Yum, DNF and Pulp try to handle these problems, " "but unexpected things may happen.\n\n" "Please read https://github.com/pulp/pulp_rpm/issues/2402 for more details." ) if not pkgid_warning_triggered and pkg.pkgId in packages: pkgid_warning_triggered = True if mirror: raise Exception(ERR_MSG.format("PKGIDs")) else: log.warn(WARN_MSG.format("PKGIDs")) if not nevra_warning_triggered and pkg.nevra() in nevras: nevra_warning_triggered = True if mirror: raise Exception(ERR_MSG.format("NEVRAs")) else: log.warn(WARN_MSG.format("NEVRAs")) packages[pkg.pkgId] = pkg nevras.add(pkg.nevra()) def newpkgcb(pkgId, name, arch): """ A callback which is used when a new package entry is encountered. Only opening <package> element is parsed at that moment. This function has to return a package which parsed data will be added to or None if a package should be skipped. pkgId, name and arch of a package can be used to skip further parsing. Available only for filelists.xml and other.xml. Args: pkgId(str): pkgId of a package name(str): name of a package arch(str): arch of a package Returns: createrepo_c.Package: a package which parsed data should be added to. If None is returned, further parsing of a package will be skipped. """ return packages.get(pkgId, None) cr.xml_parse_primary(primary_xml_path, pkgcb=pkgcb, warningcb=warningcb, do_files=False) if not only_primary: cr.xml_parse_filelists(filelists_xml_path, newpkgcb=newpkgcb, warningcb=warningcb) cr.xml_parse_other(other_xml_path, newpkgcb=newpkgcb, warningcb=warningcb) return packages