def rebuild_lgr(lgr, options): """ Rebuild an LGR with given parameters. options argument can contain: * unicode_version: The target Unicode version to be used when rebuilding the LGR. If None is given, use the current one. * validating_repertoire: The validating repertoire used for checking code points. * unidb: Munidata's Unicode database. If None, skip Unicode checks. :param LGR lgr: The LGR to rebuild. :param dict options: Dictionary of options to the validation function. """ # Local import to prevent import cycles from lgr.core import LGR unicode_version = options.get('unicode_version', lgr.metadata.unicode_version) validating_repertoire = options.get('validating_repertoire', None) description = "Rebuilding LGR with Unicode version {}".format( unicode_version) if validating_repertoire is not None: description += " and validating repertoire '{}'".format( validating_repertoire) result = { 'description': description, 'repertoire': {} # XXX: Cannot use defaultdict because of django... } logger.info( "Rebuilding LGR '%s' with Unicode version %s " "and Validating Repertoire '%s'", lgr, unicode_version, validating_repertoire) unidb = options.get('unidb', None) if unidb is not None: unidb_version = unidb.get_unicode_version() if unidb_version != unicode_version: result['generic'] = "Target Unicode version {} " \ "differs from UnicodeDatabase {}".format(unicode_version, unidb_version) logger.warning( "Target Unicode version %s differs " "from UnicodeDatabase %s", unicode_version, unidb_version) # For now, simply copy the metadata and references of the source LGR target_metadata = copy.deepcopy(lgr.metadata) target_metadata.unicode_version = unicode_version target_reference_manager = copy.deepcopy(lgr.reference_manager) target_lgr = LGR(name=lgr.name, metadata=target_metadata, reference_manager=target_reference_manager, unicode_database=unidb) for char in lgr.repertoire: if isinstance(char, RangeChar): range_ok = True for cp, status in target_lgr.check_range(char.first_cp, char.last_cp, validating_repertoire): if status is not None: result['repertoire'].setdefault(char, {}).setdefault( 'errors', []).append(status) range_ok = False in_script, _ = lgr.cp_in_script([cp]) if not in_script: result['repertoire'].setdefault(char, {}).setdefault( 'warnings', []).append(CharNotInScript(cp)) range_ok = False if not range_ok: continue try: target_lgr.add_range( char.first_cp, char.last_cp, comment=char.comment, ref=char.references, tag=char.tags, when=char.when, not_when=char.not_when, validating_repertoire=validating_repertoire, override_repertoire=False) except LGRException as exc: result['repertoire'].setdefault(char, {}).setdefault('errors', []).append(exc) logger.error("Cannot add range '%s-%s'", format_cp(char.first_cp), format_cp(char.last_cp)) continue in_script, _ = lgr.cp_in_script(char.cp) if not in_script: result['repertoire'].setdefault(char, {}).setdefault( 'warnings', []).append(CharNotInScript(char.cp)) # Insert code point try: target_lgr.add_cp(char.cp, comment=char.comment, ref=char.references, tag=char.tags, when=char.when, not_when=char.not_when, validating_repertoire=validating_repertoire, override_repertoire=False) except LGRException as exc: result['repertoire'].setdefault(char, {}).setdefault('errors', []).append(exc) logger.error("Cannot add code point '%s'", format_cp(char.cp)) if not isinstance(exc, CharInvalidIdnaProperty ): # Cannot include non-IDNA valid code points target_lgr.add_cp(char.cp, comment=char.comment, ref=char.references, tag=char.tags, when=char.when, not_when=char.not_when, force=True) # Create variants for var in char.get_variants(): try: target_lgr.add_variant( char.cp, variant_cp=var.cp, variant_type=var.type, when=var.when, not_when=var.not_when, comment=var.comment, ref=var.references, validating_repertoire=validating_repertoire, override_repertoire=True) except LGRException as exc: result['repertoire'].setdefault(char, {}).setdefault( 'variants', {}).setdefault(var, []).append(exc) logger.error("Cannot add variant '%s' to code point '%s'", format_cp(var.cp), format_cp(char.cp)) if not isinstance( exc, CharInvalidIdnaProperty ): # Cannot include non-IDNA valid code points target_lgr.add_variant(char.cp, variant_cp=var.cp, variant_type=var.type, when=var.when, not_when=var.not_when, comment=var.comment, ref=var.references, force=True) logger.info("Rebuilding LGR '%s done", lgr) return True, result
class TestLGRCore(unittest.TestCase): def setUp(self): unidb = IDNADatabase('6.3.0') self.lgr = LGR(unicode_database=unidb) def test_add_single_cp_list(self): self.lgr.add_cp([0x0061]) self.assertIn(0x0061, self.lgr.repertoire) def test_add_single_cp_int(self): self.lgr.add_cp(0x0061) self.assertIn(0x0061, self.lgr.repertoire) def test_add_cp_sequence(self): self.lgr.add_cp([0x0061, 0x0062]) self.assertIn([0x0061, 0x0062], self.lgr.repertoire) self.assertNotIn(0x0061, self.lgr.repertoire) self.assertNotIn(0x0062, self.lgr.repertoire) def test_add_multiple_cp_sequences(self): self.lgr.add_cp([0x0061, 0x0062]) self.lgr.add_cp([0x0061, 0x0062, 0x0063]) self.assertIn([0x0061, 0x0062], self.lgr.repertoire) self.assertIn([0x0061, 0x0062, 0x0063], self.lgr.repertoire) self.assertNotIn(0x0061, self.lgr.repertoire) self.assertNotIn(0x0062, self.lgr.repertoire) self.assertNotIn(0x0063, self.lgr.repertoire) def test_add_cp_in_repertoire(self): self.lgr.add_cp([0x0061]) self.assertRaises(CharAlreadyExists, self.lgr.add_cp, [0x0061]) self.assertRaises(CharAlreadyExists, self.lgr.add_cp, 0x0061) def test_add_cp_validation(self): validation_lgr = LGR() validation_lgr.add_cp([0x0061]) self.lgr.add_cp([0x0061], validating_repertoire=validation_lgr, override_repertoire=False) self.assertRaises(NotInRepertoire, self.lgr.add_cp, [0x0062], validating_repertoire=validation_lgr, override_repertoire=False) def test_add_cp_validation_override(self): validation_lgr = LGR() validation_lgr.add_cp([0x0061]) self.lgr.add_cp([0x0061], validating_repertoire=validation_lgr, override_repertoire=False) self.lgr.add_cp([0x0062], validating_repertoire=validation_lgr, override_repertoire=True) self.assertIn(0x0062, self.lgr.repertoire) def test_del_single_cp_list(self): self.lgr.add_cp(0x0061) self.lgr.del_cp([0x0061]) self.assertNotIn(0x0061, self.lgr.repertoire) def test_del_single_cp_int(self): self.lgr.add_cp([0x0061]) self.lgr.del_cp(0x0061) self.assertNotIn(0x0061, self.lgr.repertoire) def test_del_cp_sequence(self): self.lgr.add_cp([0x0061, 0x0062]) self.lgr.del_cp([0x0061, 0x0062]) self.assertEqual(len(self.lgr.repertoire), 0) def test_del_cp_sequence_with_cp(self): self.lgr.add_cp([0x0061, 0x0062]) self.assertRaises(NotInLGR, self.lgr.del_cp, 0x0061) self.assertRaises(NotInLGR, self.lgr.del_cp, 0x0062) self.assertIn([0x0061, 0x0062], self.lgr.repertoire) def test_add_cp_when_not_when(self): self.lgr.add_cp([0x0061], when='w1') with self.assertRaises(CharInvalidContextRule) as cm: self.lgr.add_cp([0x0062], when='w2', not_when='nw1') the_exception = cm.exception self.assertEqual(the_exception.cp, [0x0062]) self.lgr.add_cp([0x0062], not_when='nw2') with self.assertRaises(CharInvalidContextRule) as cm: self.lgr.add_cp([0x0063], when='w3', not_when='nw3') the_exception = cm.exception self.assertEqual(the_exception.cp, [0x0063]) def test_add_range(self): self.lgr.add_range(0x0061, 0x007A) for cp in range(0x0061, 0x007A + 1): self.assertIn(cp, self.lgr.repertoire) def test_add_range_in_repertoire(self): self.lgr.add_range(0x0061, 0x007A) self.assertRaises(CharAlreadyExists, self.lgr.add_range, 0x0061, 0x007A) def test_add_range_validation(self): validation_lgr = LGR() for cp in range(0x0061, 0x007A + 1): validation_lgr.add_cp(cp) self.lgr.add_range(0x0061, 0x007A, validating_repertoire=validation_lgr, override_repertoire=False) self.assertRaises(NotInRepertoire, self.lgr.add_range, 0x00F8, 0x00FF, validating_repertoire=validation_lgr, override_repertoire=False) def test_add_range_validation_with_range(self): validation_lgr = LGR() validation_lgr.add_range(0x0061, 0x007A) self.lgr.add_range(0x0061, 0x007A, validating_repertoire=validation_lgr, override_repertoire=False) self.assertRaises(NotInRepertoire, self.lgr.add_range, 0x00F8, 0x00FF, validating_repertoire=validation_lgr, override_repertoire=False) def test_add_range_validation_override(self): validation_lgr = LGR() for cp in range(0x0061, 0x007A): validation_lgr.add_cp(cp) self.lgr.add_range(0x0031, 0x0032, validating_repertoire=validation_lgr, override_repertoire=True) self.assertIn(0x0031, self.lgr.repertoire) def test_add_range_when_not_when(self): self.lgr.add_range(0x0061, 0x0065, when='w1') with self.assertRaises(RangeInvalidContextRule) as cm: self.lgr.add_range(0x0066, 0x007A, when='w2', not_when='nw1') the_exception = cm.exception self.assertEqual(the_exception.first_cp, 0x0066) self.assertEqual(the_exception.last_cp, 0x007A) self.lgr.add_range(0x0066, 0x007A, not_when='nw2') with self.assertRaises(RangeInvalidContextRule) as cm: self.lgr.add_range(0x01BD, 0x01C3, when='w3', not_when='nw3') the_exception = cm.exception self.assertEqual(the_exception.first_cp, 0x01BD) self.assertEqual(the_exception.last_cp, 0x01C3) def test_expand_ranges(self): self.lgr.add_range(0x0061, 0x007A) for cp in range(0x0061, 0x007A + 1): self.assertIsInstance(self.lgr.get_char(cp), RangeChar) self.lgr.add_range(0x01BD, 0x01C3) for cp in range(0x01BD, 0x01C3 + 1): self.assertIsInstance(self.lgr.get_char(cp), RangeChar) self.lgr.expand_ranges() for cp in range(0x0061, 0x007A + 1): char = self.lgr.get_char(cp) self.assertIsInstance(char, Char) self.assertNotIsInstance(char, RangeChar) for cp in range(0x01BD, 0x01C3 + 1): char = self.lgr.get_char(cp) self.assertIsInstance(char, Char) self.assertNotIsInstance(char, RangeChar) def test_expand_range(self): self.lgr.add_range(0x0061, 0x007A) for cp in range(0x0061, 0x007A + 1): self.assertIsInstance(self.lgr.get_char(cp), RangeChar) self.lgr.expand_range(0x0061, 0x007A) for cp in range(0x0061, 0x007A + 1): char = self.lgr.get_char(cp) self.assertIsInstance(char, Char) self.assertNotIsInstance(char, RangeChar) def test_add_variant_in_repertoire(self): self.lgr.add_cp([0x0061]) self.lgr.add_variant([0x0061], [0x0030]) self.assertRaises(VariantAlreadyExists, self.lgr.add_variant, [0x0061], [0x0030]) def test_add_variant_validation(self): validation_lgr = LGR() validation_lgr.add_cp([0x0061]) validation_lgr.add_cp([0x0030]) self.lgr.add_cp([0x0061]) self.lgr.add_variant([0x0061], [0x0030]) self.assertRaises(NotInRepertoire, self.lgr.add_variant, [0x0061], [0x0062], validating_repertoire=validation_lgr, override_repertoire=False) def test_add_variant_when_not_when(self): self.lgr.add_cp([0x0061]) self.lgr.add_variant([0x0061], [0x0030], when='w1') with self.assertRaises(VariantInvalidContextRule) as cm: self.lgr.add_variant([0x0061], [0x0031], when='w2', not_when='nw1') the_exception = cm.exception self.assertEqual(the_exception.cp, [0x0061]) self.assertEqual(the_exception.variant, [0x0031]) self.lgr.add_variant([0x0061], [0x0030], not_when='nw2') with self.assertRaises(VariantInvalidContextRule) as cm: self.lgr.add_variant([0x0061], [0x0031], when='w3', not_when='nw3') the_exception = cm.exception self.assertEqual(the_exception.cp, [0x0061]) self.assertEqual(the_exception.variant, [0x0031]) def test_del_cp_validation_override(self): validation_lgr = LGR() validation_lgr.add_cp([0x0061]) validation_lgr.add_cp([0x0030]) self.lgr.add_cp([0x0061]) self.lgr.add_variant([0x0061], [0x0030]) self.lgr.add_variant([0x0061], [0x0062], validating_repertoire=validation_lgr, override_repertoire=True) self.assertIn((0x0062, ), self.lgr.repertoire[0x0061]._variants) def test_get_variants(self): self.lgr.add_cp([0x0061]) self.lgr.add_variant([0x0061], [0x0030]) variants = self.lgr.get_variants([0x0061]) self.assertIsInstance(variants, types.GeneratorType) variant_list = list(variants) self.assertEqual(len(variant_list), 1) def test_check_range_no_modification(self): self.lgr.check_range(0x0060, 0x007F) self.assertEqual(len(self.lgr.repertoire), 0) def test_check_range(self): self.lgr.add_cp([0x0061]) self.lgr.add_cp([0x007A]) codepoints = self.lgr.check_range(0x0060, 0x007F) for result in codepoints: cp = result[0] prop = result[1] if cp == 0x060 or cp >= 0x007B: self.assertIsInstance(prop, CharInvalidIdnaProperty) elif cp == 0x0061 or cp == 0x007A: self.assertIsInstance(prop, CharAlreadyExists) else: self.assertIsNone(prop) def test_add_codepoints(self): self.lgr.add_codepoints([c for c in range(0x0061, 0x007A + 1)] + [0x0107] + [0x0137, 0x0138]) expected_output = [ RangeChar(0x061, 0x0061, 0x007A), Char(0x0107), RangeChar(0x0137, 0x0137, 0x0138) ] self.assertEqual(expected_output, list(self.lgr.repertoire)) def test_tags_on_codepoint(self): self.lgr.add_cp([0x0061], tag=['t1', 't2']) with self.assertRaises(LGRFormatException) as cm: self.lgr.add_cp([0x0062], tag=['t1', 't1']) the_exception = cm.exception self.assertEqual(the_exception.reason, LGRFormatException.LGRFormatReason.DUPLICATE_TAG) def test_tags_on_codepoint_sequence(self): with self.assertRaises(LGRFormatException) as cm: self.lgr.add_cp([0x0061, 0x0062], tag=['t1']) the_exception = cm.exception self.assertEqual(the_exception.reason, LGRFormatException.LGRFormatReason.SEQUENCE_NO_TAG) def test_tags_on_range(self): self.lgr.add_range(0x0061, 0x0062, tag=['t1', 't2']) with self.assertRaises(LGRFormatException) as cm: self.lgr.add_range(0x0063, 0x0064, tag=['t1', 't1']) the_exception = cm.exception self.assertEqual(the_exception.reason, LGRFormatException.LGRFormatReason.DUPLICATE_TAG) def test_list_types(self): self.lgr.add_cp([0x0061]) self.lgr.add_variant([0x0061], [0x0030], variant_type='BLOCK') self.lgr.add_variant([0x0061], [0x0031], variant_type='VALID') self.lgr.add_variant([0x0061], [0x0032], variant_type='BLOCK') self.assertEquals(self.lgr.types, set(['BLOCK', 'VALID'])) def test_del_reference(self): ref_id_1 = self.lgr.add_reference("Test - 1") ref_id_2 = self.lgr.add_reference("Test - 2") self.lgr.add_cp([0x0061], ref=[ref_id_1]) self.lgr.add_cp([0x0062], ref=[ref_id_1, ref_id_2]) self.lgr.del_reference(ref_id_1) self.assertNotIn(ref_id_1, self.lgr.reference_manager) self.assertEquals(self.lgr.get_char([0x0061]).references, []) self.assertEquals(self.lgr.get_char([0x0062]).references, [ref_id_2]) def test_add_cp_duplicate_reference(self): ref_id = self.lgr.add_reference("Test - 1") with self.assertRaises(DuplicateReference) as cm: self.lgr.add_cp([0x0061], ref=[ref_id, ref_id]) the_exception = cm.exception self.assertEqual(the_exception.cp, [0x0061]) def test_add_range_duplicate_reference(self): ref_id = self.lgr.add_reference("Test - 1") with self.assertRaises(DuplicateReference) as cm: self.lgr.add_range(0x0061, 0x0062, ref=[ref_id, ref_id]) the_exception = cm.exception self.assertEqual(the_exception.cp, 0x0061) def test_add_variant_duplicate_reference(self): self.lgr.add_cp([0x0061]) ref_id = self.lgr.add_reference("Test - 1") with self.assertRaises(DuplicateReference) as cm: self.lgr.add_variant([0x0061], [0x0062], ref=[ref_id, ref_id]) the_exception = cm.exception self.assertEqual(the_exception.cp, [0x0061]) def test_generate_variants(self): self.lgr.add_cp([0x0061]) self.lgr.add_cp([0x0062]) self.lgr.add_cp([0x0063]) self.lgr.add_cp([0x0064]) self.lgr.add_variant([0x0061], [0x0070], variant_type="type0") self.lgr.add_variant([0x0062], [0x0071], variant_type="type1") self.lgr.add_variant([0x0062], [0x0072], variant_type="type2") self.assertEqual([], list(self.lgr._generate_label_variants([]))) self.assertEqual([], list(self.lgr._generate_label_variants([0x0063]))) self.assertEqual( [], list(self.lgr._generate_label_variants([0x0063, 0x0064]))) self.assertEqual( set([((0x0071, 0x0063), frozenset(['type1']), False), ((0x0072, 0x0063), frozenset(['type2']), False)]), set(self.lgr._generate_label_variants([0x0062, 0x0063]))) self.assertEqual( set([ ((0x0061, 0x0062), frozenset(), False), ((0x0061, 0x0071), frozenset(['type1']), False), ((0x0061, 0x0072), frozenset(['type2']), False), ((0x0070, 0x0062), frozenset(['type0']), False), ((0x0070, 0x0071), frozenset(['type0', 'type1']), True), ((0x0070, 0x0072), frozenset(['type0', 'type2']), True), ]), set(self.lgr._generate_label_variants([0x0061, 0x0062]))) self.assertEqual( set([ ((0x0061, 0x0062, 0x0062), frozenset(), False), ((0x0061, 0x0062, 0x0071), frozenset(['type1']), False), ((0x0061, 0x0062, 0x0072), frozenset(['type2']), False), ((0x0061, 0x0071, 0x0062), frozenset(['type1']), False), ((0x0061, 0x0071, 0x0071), frozenset(['type1']), False), ((0x0061, 0x0071, 0x0072), frozenset(['type1', 'type2']), False), ((0x0061, 0x0072, 0x0062), frozenset(['type2']), False), ((0x0061, 0x0072, 0x0071), frozenset(['type1', 'type2']), False), ((0x0061, 0x0072, 0x0072), frozenset(['type2']), False), ((0x0070, 0x0062, 0x0062), frozenset(['type0']), False), ((0x0070, 0x0062, 0x0071), frozenset(['type0', 'type1']), False), ((0x0070, 0x0062, 0x0072), frozenset(['type0', 'type2']), False), ((0x0070, 0x0071, 0x0062), frozenset(['type0', 'type1']), False), ((0x0070, 0x0071, 0x0071), frozenset(['type0', 'type1']), True), ((0x0070, 0x0071, 0x0072), frozenset(['type0', 'type1', 'type2']), True), ((0x0070, 0x0072, 0x0062), frozenset(['type0', 'type2']), False), ((0x0070, 0x0072, 0x0071), frozenset(['type0', 'type1', 'type2']), True), ((0x0070, 0x0072, 0x0072), frozenset(['type0', 'type2']), True), ]), set(self.lgr._generate_label_variants([0x0061, 0x0062, 0x0062]))) def test_generate_variants_reflexive(self): self.lgr.add_cp([0x0061]) self.lgr.add_cp([0x0062]) self.lgr.add_cp([0x0063]) self.lgr.add_variant([0x0062], [0x0062], variant_type="reflexive") self.lgr.add_variant([0x0063], [0x0070], variant_type="type") self.assertEqual([], list(self.lgr._generate_label_variants([]))) self.assertEqual([], list(self.lgr._generate_label_variants([0x0061]))) self.assertEqual([((0x0062, ), frozenset(['reflexive']), True)], list(self.lgr._generate_label_variants([0x0062]))) self.assertEqual( set([ ((0x0062, 0x0063), frozenset(['reflexive']), False), ((0x0062, 0x0070), frozenset(['reflexive', 'type']), True), ]), set(self.lgr._generate_label_variants([0x0062, 0x0063]))) def test_label_simple(self): self.lgr.add_cp([0x0061]) self.lgr.add_cp([0x0062, 0x0063]) self.lgr.add_range(0x0064, 0x0068) valid_labels = ([0x0061], [0x0062, 0x0063], [0x0064], [0x0068], [0x0061, 0x0064], [0x0061, 0x0062, 0x0063, 0x0064], [0x0062, 0x0063, 0x0068]) invalid_labels = (([0x0060], [], [0x0060]), ([0x0069], [], [0x0069]), ([0x0062], [], [0x0062]), ([0x0063], [], [0x0063]), ([0x0061, 0x0062], [0x0061], [0x0062])) for label in valid_labels: self.assertEqual((True, label, []), self.lgr._test_preliminary_eligibility(label)) for (label, label_part, not_in_lgr) in invalid_labels: self.assertEqual((False, label_part, not_in_lgr), self.lgr._test_preliminary_eligibility(label)) def test_label_eligibility_multiple_choices(self): self.lgr.add_cp([0x0061]) self.lgr.add_cp([0x0061, 0x0062, 0x0063]) self.lgr.add_cp([0x0064]) self.assertEqual(self.lgr._test_preliminary_eligibility([0x0062]), (False, [], [0x0062])) self.assertEqual( self.lgr._test_preliminary_eligibility( [0x0061, 0x0062, 0x0063, 0x0064]), (True, [0x0061, 0x0062, 0x0063, 0x0064], [])) def test_label_delayed_eligibilty(self): self.lgr.add_cp([0x0061]) self.lgr.add_variant([0x0061], [0x0061], 'block') self.lgr.add_cp([0x0062]) self.lgr.add_variant([0x0062], [0x0062], 'invalid') self.lgr.add_cp([0x0063, 0x0064]) self.lgr.add_variant([0x0063, 0x0064], [0x0063, 0x0064], 'invalid') self.assertEqual(self.lgr._test_label_disposition([0x0062]), ('invalid', 0)) self.assertEqual(self.lgr._test_label_disposition([0x0063, 0x0064]), ('invalid', 0)) self.assertEqual(self.lgr._test_label_disposition([0x0061, 0x0062]), ('invalid', 0)) def test_label_length(self): self.lgr.add_cp([0x0061]) self.lgr.add_variant([0x0061], [0x0061], 'disp') self.lgr.add_cp([0x0062]) self.lgr.add_variant([0x0062], [0x0062], 'disp') self.assertEqual(PROTOCOL_LABEL_MAX_LENGTH, self.lgr.max_label_length()) for i in range(80): self.lgr.add_variant([0x0062], [0x074D + i], 'disp') # 41: mean number of variants per character self.assertEqual(int(math.log(MAX_NUMBER_GENERATED_VARIANTS, 41)), self.lgr.max_label_length())