def test_rpm(self): namespaces = [ utils.Namespace('rpm', primary.RPM_SPEC_URL), ] raw_xml = utils.element_to_raw_xml(self.rpm_element, namespaces, primary.COMMON_SPEC_URL) # make sure it stripped out any namespace declarations and root elements self.assertTrue(re.match(r'^<package +type="rpm">', raw_xml)) # make sure there are no stray closing elements, like </metadata> self.assertTrue(raw_xml.rstrip().endswith('</package>')) # make sure it preserved the "rpm" prefix self.assertTrue(re.search(r'<rpm:license *>GPLv2</rpm:license>', raw_xml)) # make sure it got the requires and provides entries self.assertTrue(raw_xml.find('dolphin') >= 0) self.assertTrue(raw_xml.find('penguin') >= 0) # these should all be stripped out self.assertTrue(raw_xml.find('xmlns') == -1) # had this problem on python 2.6 where it treated the default namespace # as a namespace with prefix '' self.assertTrue(raw_xml.find('<:') == -1) # try to re-parse the XML to make sure it's valid. fake tag is necessary # to declare the prefix "rpm" fake_xml = '<fake xmlns:rpm="http://pulpproject.org">%s</fake>' % raw_xml # fromstring just to make sure this is valid ET.fromstring(fake_xml)
def generate_dbs(self): """ For repo data files that contain data we need to access later for each unit in the repo, generate a local db file that gives us quick read access to each unit's data. :raises PulpCodedException: if there is some inconsistency in metadata """ package_count = {} for filename, tag, process_func in ( (filelists.METADATA_FILE_NAME, filelists.PACKAGE_TAG, filelists.process_package_element), (other.METADATA_FILE_NAME, other.PACKAGE_TAG, other.process_package_element), ): with contextlib.closing(self.get_metadata_file_handle(filename)) as xml_file_handle: generator = package_list_generator(xml_file_handle, tag) db_filename = os.path.join(self.dst_dir, '%s.db' % filename) # always a New file, and open with Fast writing mode. with contextlib.closing(gdbm.open(db_filename, 'nf')) as db_file_handle: for element in generator: utils.strip_ns(element) element.attrib['pkgid'] = models.RpmBase.PKGID_TEMPLATE raw_xml = utils.element_to_raw_xml(element) unit_key, _ = process_func(element) db_key = self.generate_db_key(unit_key) db_file_handle[db_key] = raw_xml db_file_handle.sync() package_count[filename] = len(db_file_handle) self.dbs[filename] = db_filename if package_count[filelists.METADATA_FILE_NAME] != package_count[other.METADATA_FILE_NAME]: reason = ('metadata is specified for different set of packages in filelists.xml' ' and in other.xml') raise PulpCodedException(error_code=error_codes.RPM1015, reason=reason) self.rpm_count = package_count[filelists.METADATA_FILE_NAME]
def generate_dbs(self): """ For repo data files that contain data we need to access later for each unit in the repo, generate a local db file that gives us quick read access to each unit's data. """ for filename, tag, process_func in ( (filelists.METADATA_FILE_NAME, filelists.PACKAGE_TAG, filelists.process_package_element), (other.METADATA_FILE_NAME, other.PACKAGE_TAG, other.process_package_element), ): xml_file_handle = self.get_metadata_file_handle(filename) try: generator = package_list_generator(xml_file_handle, tag) db_filename = os.path.join(self.dst_dir, '%s.db' % filename) # always a New file, and open with Fast writing mode. db_file_handle = gdbm.open(db_filename, 'nf') try: for element in generator: utils.strip_ns(element) raw_xml = utils.element_to_raw_xml(element) unit_key, _ = process_func(element) db_key = self.generate_db_key(unit_key) db_file_handle[db_key] = raw_xml db_file_handle.sync() finally: db_file_handle.close() finally: xml_file_handle.close() self.dbs[filename] = db_filename
def test_rpm(self): namespaces = [ utils.Namespace('rpm', primary.RPM_SPEC_URL), ] raw_xml = utils.element_to_raw_xml(self.rpm_element, namespaces, primary.COMMON_SPEC_URL) # make sure it stripped out any namespace declarations and root elements self.assertTrue(re.match(r'^<package +type="rpm">', raw_xml)) # make sure there are no stray closing elements, like </metadata> self.assertTrue(raw_xml.rstrip().endswith('</package>')) # make sure it preserved the "rpm" prefix self.assertTrue(re.search(r'<rpm:license *>GPLv2</rpm:license>', raw_xml)) # make sure it got the requires and provides entries self.assertTrue(raw_xml.find('dolphin') >= 0) self.assertTrue(raw_xml.find('penguin') >= 0) # these should all be stripped out self.assertTrue(raw_xml.find('xmlns') == -1) # had this problem on python 2.6 where it treated the default namespace # as a namespace with prefix '' self.assertTrue(raw_xml.find('<:') == -1) # try to re-parse the XML to make sure it's valid. fake tag is necessary # to declare the prefix "rpm" fake_xml = '<fake xmlns:rpm="http://pulpproject.org">%s</fake>' % raw_xml # fromstring just to make sure this is valid ET.fromstring(fake_xml)
def generate_dbs(self): """ For repo data files that contain data we need to access later for each unit in the repo, generate a local db file that gives us quick read access to each unit's data. """ for filename, tag, process_func in ( (filelists.METADATA_FILE_NAME, filelists.PACKAGE_TAG, filelists.process_package_element), (other.METADATA_FILE_NAME, other.PACKAGE_TAG, other.process_package_element), ): xml_file_handle = self.get_metadata_file_handle(filename) try: generator = package_list_generator(xml_file_handle, tag) db_filename = os.path.join(self.dst_dir, "%s.db" % filename) # always a New file, and open with Fast writing mode. db_file_handle = gdbm.open(db_filename, "nf") try: for element in generator: utils.strip_ns(element) raw_xml = utils.element_to_raw_xml(element) unit_key, _ = process_func(element) db_key = self.generate_db_key(unit_key) db_file_handle[db_key] = raw_xml db_file_handle.sync() finally: db_file_handle.close() finally: xml_file_handle.close() self.dbs[filename] = db_filename
def generate_dbs(self): """ For repo data files that contain data we need to access later for each unit in the repo, generate a local db file that gives us quick read access to each unit's data. """ for filename, tag, process_func in ( (filelists.METADATA_FILE_NAME, filelists.PACKAGE_TAG, filelists.process_package_element), (other.METADATA_FILE_NAME, other.PACKAGE_TAG, other.process_package_element), ): with contextlib.closing(self.get_metadata_file_handle(filename)) as xml_file_handle: generator = package_list_generator(xml_file_handle, tag) db_filename = os.path.join(self.dst_dir, '%s.db' % filename) # always a New file, and open with Fast writing mode. with contextlib.closing(gdbm.open(db_filename, 'nf')) as db_file_handle: for element in generator: utils.strip_ns(element) element.attrib['pkgid'] = models.RpmBase.PKGID_TEMPLATE raw_xml = utils.element_to_raw_xml(element) unit_key, _ = process_func(element) db_key = self.generate_db_key(unit_key) db_file_handle[db_key] = raw_xml db_file_handle.sync() self.dbs[filename] = db_filename
def test_other(self): utils.strip_ns(self.other_element) raw_xml = utils.element_to_raw_xml(self.other_element) self.assertTrue(raw_xml.startswith('<package ')) self.assertTrue(raw_xml.find('<version ') >= 0) self.assertEqual(raw_xml.count('<changelog '), 10) self.assertEqual(raw_xml.count('author="Doug Ledford'), 7) # fromstring just to make sure this is valid ET.fromstring(raw_xml)
def test_other(self): utils.strip_ns(self.other_element) raw_xml = utils.element_to_raw_xml(self.other_element) self.assertTrue(raw_xml.startswith('<package ')) self.assertTrue(raw_xml.find('<version ') >= 0) self.assertEqual(raw_xml.count('<changelog '), 10) self.assertEqual(raw_xml.count('author="Doug Ledford'), 7) # fromstring just to make sure this is valid ET.fromstring(raw_xml)
def test_filelists(self): utils.strip_ns(self.filelists_element) raw_xml = utils.element_to_raw_xml(self.filelists_element) self.assertTrue(raw_xml.startswith('<package ')) self.assertTrue(raw_xml.find('<version ') >= 0) self.assertTrue(raw_xml.find('name="opensm-libs"') >= 0) self.assertTrue(raw_xml.find('<file>/usr/lib64/libosmcomp.so.3</file>') >= 0) self.assertEqual(raw_xml.count('<file>'), 6) # fromstring just to make sure this is valid ET.fromstring(raw_xml)
def test_filelists(self): utils.strip_ns(self.filelists_element) raw_xml = utils.element_to_raw_xml(self.filelists_element) self.assertTrue(raw_xml.startswith('<package ')) self.assertTrue(raw_xml.find('<version ') >= 0) self.assertTrue(raw_xml.find('name="opensm-libs"') >= 0) self.assertTrue(raw_xml.find('<file>/usr/lib64/libosmcomp.so.3</file>') >= 0) self.assertEqual(raw_xml.count('<file>'), 6) # fromstring just to make sure this is valid ET.fromstring(raw_xml)
def generate_dbs(self): """ For repo data files that contain data we need to access later for each unit in the repo, generate a local db file that gives us quick read access to each unit's data. :raises PulpCodedException: if there is some inconsistency in metadata """ package_count = {} for filename, tag, process_func in ( (filelists.METADATA_FILE_NAME, filelists.PACKAGE_TAG, filelists.process_package_element), (other.METADATA_FILE_NAME, other.PACKAGE_TAG, other.process_package_element), ): with contextlib.closing(self.get_metadata_file_handle( filename)) as xml_file_handle: generator = package_list_generator(xml_file_handle, tag) db_filename = os.path.join(self.dst_dir, '%s.db' % filename) # always a New file, and open with Fast writing mode. with contextlib.closing(gdbm.open(db_filename, 'nf')) as db_file_handle: for element in generator: utils.strip_ns(element) element.attrib['pkgid'] = models.RpmBase.PKGID_TEMPLATE raw_xml = utils.element_to_raw_xml(element) unit_key, _ = process_func(element) db_key = self.generate_db_key(unit_key) db_file_handle[db_key] = raw_xml db_file_handle.sync() package_count[filename] = len(db_file_handle) self.dbs[filename] = db_filename if package_count[filelists.METADATA_FILE_NAME] != package_count[ other.METADATA_FILE_NAME]: reason = ( 'metadata is specified for different set of packages in filelists.xml' ' and in other.xml') raise PulpCodedException(error_code=error_codes.RPM1015, reason=reason) self.rpm_count = package_count[filelists.METADATA_FILE_NAME]
def process_package_element(package_element): """ Process a parsed primary.xml package element into a model instance. In addition to parsing the data, this templatizes the raw XML that gets added. :param package_element: parsed primary.xml package element :return: package information dictionary :rtype: pulp_rpm.plugins.db.models.RPM """ package_info = dict() name_element = package_element.find(NAME_TAG) if name_element is not None: package_info['name'] = name_element.text arch_element = package_element.find(ARCH_TAG) if arch_element is not None: package_info['arch'] = arch_element.text version_element = package_element.find(VERSION_TAG) if version_element is not None: package_info['version'] = version_element.attrib['ver'] package_info['release'] = version_element.attrib.get('rel', None) package_info['epoch'] = version_element.attrib.get('epoch', None) checksum_element = package_element.find(CHECKSUM_TAG) if checksum_element is not None: checksum_type = util.sanitize_checksum_type(checksum_element.attrib['type']) package_info['checksumtype'] = checksum_type package_info['checksum'] = checksum_element.text # convert these to template targets that will be rendered at publish time checksum_element.text = models.RpmBase.CHECKSUM_TEMPLATE checksum_element.attrib['type'] = models.RpmBase.CHECKSUMTYPE_TEMPLATE summary_element = package_element.find(SUMMARY_TAG) if summary_element is not None: package_info['summary'] = summary_element.text description_element = package_element.find(DESCRIPTION_TAG) if description_element is not None: package_info['description'] = description_element.text url_element = package_element.find(URL_TAG) if url_element is not None: package_info['url'] = url_element.text time_element = package_element.find(TIME_TAG) if time_element is not None: package_info['time'] = int(time_element.attrib['file']) package_info['build_time'] = int(time_element.attrib['build']) size_element = package_element.find(SIZE_TAG) if size_element is not None: package_info['size'] = int(size_element.attrib['package']) location_element = package_element.find(LOCATION_TAG) if location_element is not None: href = location_element.attrib['href'] base_url = None for attribute, value in location_element.items(): if attribute == 'base' or attribute.endswith('}base'): base_url = value package_info['base_url'] = base_url filename = os.path.basename(href) package_info['relativepath'] = href package_info['filename'] = filename # we don't make any attempt to preserve the original directory structure # this element will end up being converted back to XML and stuffed into # the DB on the unit object, so this is our chance to modify it. location_element.attrib['href'] = filename format_element = package_element.find(FORMAT_TAG) package_info.update(_process_format_element(format_element)) if package_info['arch'].lower() == 'src': model = models.SRPM(**package_info) else: model = models.RPM(**package_info) # add the raw XML so it can be saved in the database later rpm_namespace = utils.Namespace('rpm', RPM_SPEC_URL) model.raw_xml = utils.element_to_raw_xml(package_element, [rpm_namespace], COMMON_SPEC_URL) return model
def process_package_element(package_element): """ Process a parsed primary.xml package element into a package information dictionary. :param package_element: parsed primary.xml package element :return: package information dictionary :rtype: pulp_rpm.common.models.RPM """ # NOTE the use of deepcopy relies on cpython's very sensible policy of never # duplicating string literals, this may not hold up in other implementations # the python interpreter. package_info = deepcopy(PACKAGE_INFO_SKEL) package_info['type'] = package_element.attrib['type'] name_element = package_element.find(NAME_TAG) if name_element is not None: package_info['name'] = name_element.text arch_element = package_element.find(ARCH_TAG) if arch_element is not None: package_info['arch'] = arch_element.text version_element = package_element.find(VERSION_TAG) if version_element is not None: package_info['version'] = version_element.attrib['ver'] package_info['release'] = version_element.attrib.get('rel', None) package_info['epoch'] = version_element.attrib.get('epoch', None) checksum_element = package_element.find(CHECKSUM_TAG) if checksum_element is not None: package_info['checksumtype'] = checksum_element.attrib['type'] package_info['checksum'] = checksum_element.text summary_element = package_element.find(SUMMARY_TAG) if summary_element is not None: package_info['summary'] = summary_element.text description_element = package_element.find(DESCRIPTION_TAG) if description_element is not None: package_info['description'] = description_element.text url_element = package_element.find(URL_TAG) if url_element is not None: package_info['url'] = url_element.text time_element = package_element.find(TIME_TAG) if time_element is not None: package_info['time'] = int(time_element.attrib['file']) package_info['build_time'] = int(time_element.attrib['build']) size_element = package_element.find(SIZE_TAG) if size_element is not None: package_info['size'] = int(size_element.attrib['package']) location_element = package_element.find(LOCATION_TAG) if location_element is not None: href = location_element.attrib['href'] filename = os.path.basename(href) package_info['relativepath'] = href package_info['filename'] = filename # we don't make any attempt to preserve the original directory structure # this element will end up being converted back to XML and stuffed into # the DB on the unit object, so this is our chance to modify it. location_element.attrib['href'] = filename format_element = package_element.find(FORMAT_TAG) package_info.update(_process_format_element(format_element)) if package_info['arch'].lower() == 'src': model = models.SRPM.from_package_info(package_info) else: model = models.RPM.from_package_info(package_info) # add the raw XML so it can be saved in the database later rpm_namespace = utils.Namespace('rpm', RPM_SPEC_URL) model.raw_xml = utils.element_to_raw_xml(package_element, [rpm_namespace], COMMON_SPEC_URL) return model
def process_package_element(package_element): """ Process a parsed primary.xml package element into a model instance. In addition to parsing the data, this templatizes the raw XML that gets added. :param package_element: parsed primary.xml package element :return: package information dictionary :rtype: pulp_rpm.plugins.db.models.RPM """ # NOTE the use of deepcopy relies on cpython's very sensible policy of never # duplicating string literals, this may not hold up in other implementations # the python interpreter. package_info = dict() name_element = package_element.find(NAME_TAG) if name_element is not None: package_info['name'] = name_element.text arch_element = package_element.find(ARCH_TAG) if arch_element is not None: package_info['arch'] = arch_element.text version_element = package_element.find(VERSION_TAG) if version_element is not None: package_info['version'] = version_element.attrib['ver'] package_info['release'] = version_element.attrib.get('rel', None) package_info['epoch'] = version_element.attrib.get('epoch', None) checksum_element = package_element.find(CHECKSUM_TAG) if checksum_element is not None: checksum_type = util.sanitize_checksum_type(checksum_element.attrib['type']) package_info['checksumtype'] = checksum_type package_info['checksum'] = checksum_element.text # convert these to template targets that will be rendered at publish time checksum_element.text = models.RpmBase.CHECKSUM_TEMPLATE checksum_element.attrib['type'] = models.RpmBase.CHECKSUMTYPE_TEMPLATE summary_element = package_element.find(SUMMARY_TAG) if summary_element is not None: package_info['summary'] = summary_element.text description_element = package_element.find(DESCRIPTION_TAG) if description_element is not None: package_info['description'] = description_element.text url_element = package_element.find(URL_TAG) if url_element is not None: package_info['url'] = url_element.text time_element = package_element.find(TIME_TAG) if time_element is not None: package_info['time'] = int(time_element.attrib['file']) package_info['build_time'] = int(time_element.attrib['build']) size_element = package_element.find(SIZE_TAG) if size_element is not None: package_info['size'] = int(size_element.attrib['package']) location_element = package_element.find(LOCATION_TAG) if location_element is not None: href = location_element.attrib['href'] base_url = None for attribute, value in location_element.items(): if attribute == 'base' or attribute.endswith('}base'): base_url = value package_info['base_url'] = base_url filename = os.path.basename(href) package_info['relativepath'] = href package_info['filename'] = filename # we don't make any attempt to preserve the original directory structure # this element will end up being converted back to XML and stuffed into # the DB on the unit object, so this is our chance to modify it. location_element.attrib['href'] = filename format_element = package_element.find(FORMAT_TAG) package_info.update(_process_format_element(format_element)) if package_info['arch'].lower() == 'src': model = models.SRPM(**package_info) else: model = models.RPM(**package_info) # add the raw XML so it can be saved in the database later rpm_namespace = utils.Namespace('rpm', RPM_SPEC_URL) model.raw_xml = utils.element_to_raw_xml(package_element, [rpm_namespace], COMMON_SPEC_URL) return model