def _process_meta(self, elem): """ Process the <meta> element of an LGR XML file. """ metadata = Metadata(self.rfc7940_checks) reference_manager = ReferenceManager() MAPPER = { DATE_TAG: lambda d: metadata.set_date(d, force=self.force_mode), VALIDITY_START_TAG: lambda d: metadata.set_validity_start(d, force=self.force_mode), VALIDITY_END_TAG: lambda d: metadata.set_validity_end(d, force=self.force_mode), UNICODE_VERSION_TAG: lambda d: metadata.set_unicode_version(d, force=self.force_mode), } unicode_version_tag_found = False for child in elem: tag = child.tag logger.debug("Got '%s' element", tag) if tag in MAPPER: MAPPER[tag](child.text) if tag == UNICODE_VERSION_TAG: unicode_version_tag_found = True elif tag == VERSION_TAG: metadata.version = Version(child.text, child.get('comment', None)) elif tag == LANGUAGE_TAG: metadata.add_language(child.text, force=self.force_mode) elif tag == SCOPE_TAG: metadata.scopes.append( Scope(child.text, child.get('type', None))) elif tag == DESCRIPTION_TAG: # Seems to be an issue with CDATA/iterparse: https://bugs.launchpad.net/lxml/+bug/1788449 # For now, manually replace CRLF with LF metadata.description = Description( child.text.replace('\r\n', '\n'), child.get('type', None)) elif tag == REFERENCES_TAG: for reference in child: value = reference.text # Don't convert it to an int since ref_id may be a string ref_id = reference.get('id') comment = reference.get('comment', None) reference_manager.add_reference(value, comment=comment, ref_id=ref_id) # Since we have processed <reference> elements here, let's clean-up child.clear() else: logger.warning("Unhandled '%s' element in <meta> section", tag) self.rfc7940_checks.error('parse_xml') child.clear() self.rfc7940_checks.add_test_result('explicit_unicode_version', unicode_version_tag_found) self._lgr = LGR(name=self.filename, metadata=metadata, reference_manager=reference_manager, unicode_database=self._unicode_database)
class TestMetadata(unittest.TestCase): def setUp(self): self.metadata = Metadata() def test_get_scripts(self): self.metadata.languages = ['und-Cyrl', 'und-Zyyy', 'fr'] self.assertEqual(self.metadata.get_scripts(), ['Cyrl', 'Zyyy']) def test_add_language(self): self.metadata.add_language('fr') self.metadata.add_language('sr-Cyrl') self.metadata.add_language('zh-yue-HK') self.metadata.add_language('sr-Latn-RS') self.metadata.add_language('sl-nedis') self.metadata.add_language('de-CH-1901') self.metadata.add_language('hy-Latn-IT-arevela') self.metadata.add_language('es-419') self.assertEqual(self.metadata.languages, [ 'fr', 'sr-Cyrl', 'zh-yue-HK', 'sr-Latn-RS', 'sl-nedis', 'de-CH-1901', 'hy-Latn-IT-arevela', 'es-419' ]) with self.assertRaises(LGRFormatException) as cm: self.metadata.add_language('de-419-DE') the_exception = cm.exception self.assertEqual( the_exception.reason, LGRFormatException.LGRFormatReason.INVALID_LANGUAGE_TAG) def test_add_language_force(self): self.metadata.add_language('fr', force=True) self.metadata.add_language('de-419-DE', force=True) self.assertEqual(self.metadata.languages, ['fr', 'de-419-DE']) def test_set_date(self): self.metadata.set_date('2015-06-25') self.assertEqual(self.metadata.date, '2015-06-25') with self.assertRaises(LGRFormatException) as cm: self.metadata.set_date('2012-13-14') the_exception = cm.exception self.assertEqual(the_exception.reason, LGRFormatException.LGRFormatReason.INVALID_DATE_TAG) def test_set_date_force(self): self.metadata.set_date('2012-13-14', force=True) self.assertEqual(self.metadata.date, '2012-13-14') def test_set_validity_start(self): self.metadata.set_validity_start('2015-06-25') self.assertEqual(self.metadata.validity_start, '2015-06-25') with self.assertRaises(LGRFormatException) as cm: self.metadata.set_validity_start('2012-13-14') the_exception = cm.exception self.assertEqual(the_exception.reason, LGRFormatException.LGRFormatReason.INVALID_DATE_TAG) def test_set_validity_start_force(self): self.metadata.set_validity_start('2012-13-14', force=True) self.assertEqual(self.metadata.validity_start, '2012-13-14') def test_set_validity_end(self): self.metadata.set_validity_end('2015-06-25') self.assertEqual(self.metadata.validity_end, '2015-06-25') with self.assertRaises(LGRFormatException) as cm: self.metadata.set_validity_end('2012-13-14') the_exception = cm.exception self.assertEqual(the_exception.reason, LGRFormatException.LGRFormatReason.INVALID_DATE_TAG) def test_set_validity_end_force(self): self.metadata.set_validity_end('2012-13-14', force=True) self.assertEqual(self.metadata.validity_end, '2012-13-14') def test_set_unicode_version(self): self.metadata.set_unicode_version('6.3.0') self.assertEqual(self.metadata.unicode_version, '6.3.0') with self.assertRaises(LGRFormatException) as cm: self.metadata.set_unicode_version('a.b.c') the_exception = cm.exception self.assertEqual( the_exception.reason, LGRFormatException.LGRFormatReason.INVALID_UNICODE_VERSION_TAG) def test_set_unicode_version_force(self): self.metadata.set_unicode_version('a.b.c', force=True) self.assertEqual(self.metadata.unicode_version, 'a.b.c')