def test_enforce(self): profile = get_profile('UsernameCasePreserved') self.assertEqual(profile.enforce('Juliet'), 'Juliet') self.assertEqual(profile.enforce('J*'), 'J*') self.assertEqual(profile.enforce('E\u0301\u0301\u0301'), '\u00c9\u0301\u0301') self.assertEqual(profile.enforce(b'Juliet'), 'Juliet') self.profile_fail(profile, '', 'empty') self.profile_fail(profile, ' J', 'spaces') self.profile_fail(profile, '\u05d0*', 'bidi_rule')
def search(name): """Search function registered for PRECIS codecs. Args: name (str): Codec name. Returns: CodecInfo: Encode/decode information or None if not found. """ try: profile = get_profile(name) except KeyError: return None return codecs.CodecInfo(name=name, encode=_make_encode(profile), decode=_not_supported)
def freeformclass_name(name): """ Delete all characters from a name that are not part of the FreeFormClass as specified by RFC 8264 :param name: A user name :return: The FreeFormClass conformant user name """ ffc = get_profile('FreeFormClass') formatted_name = "" # remove all characters that are not in FreeFormClass for character in name: try: ffc.enforce(character) except UnicodeEncodeError: continue formatted_name += character return formatted_name
def test_identifier_oddities(self): # Make a list of all codepoints < 10,000 which are allowed in the # UsernameCasePreserved profile even though they are not allowed in # IdentifierClass. profile = get_profile('UsernameCasePreserved') allowed = [] for cp in range(0, 10000): try: profile.enforce(chr(cp)) try: profile.base.enforce(chr(cp)) except UnicodeEncodeError: allowed.append(cp) except UnicodeEncodeError: pass self.assertEqual(allowed, [ 832, 833, 835, 836, 884, 894, 2392, 2393, 2394, 2395, 2396, 2397, 2398, 2399, 2524, 2525, 2527, 2611, 2614, 2649, 2650, 2651, 2654, 2908, 2909, 3907, 3917, 3922, 3927, 3932, 3945, 3955, 3957, 3958, 3960, 3969, 3987, 3997, 4002, 4007, 4012, 4025, 8049, 8051, 8053, 8055, 8057, 8059, 8061, 8123, 8126, 8137, 8139, 8147, 8155, 8163, 8171, 8175, 8185, 8187, 8486, 8490, 8491 ])
def get_random(): return ("".join(str(random_utf8_seq(), "utf8") for i in range(3))) profiles = [ "UsernameCasePreserved", "UsernameCaseMapped", "NicknameCaseMapped", "OpaqueString" ] tests = [] if len(sys.argv) == 1: exit(1) for i in range(int(sys.argv[1])): nput = get_random() for profile in profiles: precisprofile = precis.get_profile(profile) try: output = precisprofile.enforce(nput) except: output = None tests.append({ "profile": profile, "input": nput, "output": output, "error": None }) print(json.dumps(tests))
from precis_i18n import get_profile import unicodedata profile = get_profile('nicknamecasemapped:ToLower') def _escape(s): return s.encode('unicode-escape').decode('ascii') for cp in range(0x0110000): original = chr(cp) try: actual = profile.enforce(original) if actual != original: idempotent = profile.enforce(actual) if idempotent.strip() != actual.strip(): print(_escape(original), unicodedata.name(original), ';', unicodedata.decomposition(original)) except UnicodeEncodeError: pass
from precis_i18n import get_profile from precis_i18n.derived import derived_property from precis_i18n.unicode import UnicodeData UCD = UnicodeData() profile = get_profile('UsernameCasePreserved') count = 0 ascii = 0 for cp in range(0, 0x110000): _, reason = derived_property(cp, UCD) if reason == 'has_compat': try: result = profile.enforce(chr(cp)).encode('utf-8') print('%04x => %s' % (cp, result)) if len(result) == 1 and result[0] < 128: ascii += 1 count += 1 except UnicodeEncodeError: pass print('Exceptions: %d, %d ascii' % (count, ascii))
# jid.py implements RFC 7622 and XEP-0106 import socket from collections import namedtuple from functools import lru_cache import idna import precis_i18n from gxmpp.util.decos import slot_reify _UsernameCaseMapped = precis_i18n.get_profile("UsernameCaseMapped") _OpaqueString = precis_i18n.get_profile("OpaqueString") UnescapedJID = namedtuple("UnescapedJID", "local domain resource") def _normalize_localpart(local): if local is None: return None try: local = _UsernameCaseMapped.enforce(local) except UnicodeDecodeError as e: raise ValueError( "localpart failed to validate against UsernameCaseMapped PRECIS class" ) from e l = len(local.encode("utf-8")) if not l or l > 1023: raise ValueError( "localpart must not be zero or exceed 1023 octets in length") return local
from collections import Counter import precis_i18n as precis def _escape(s): return s.encode('unicode-escape').decode('ascii') def _idempotent_ignoring_space(profile, value): result1 = profile.enforce(value) result2 = profile.enforce(result1) return result1.strip() == result2.strip() results = Counter() profile = precis.get_profile('NicknameCaseMapped:ToLower') for cp in range(0x0110000): char = chr(cp) try: if not _idempotent_ignoring_space(profile, char): decomp = unicodedata.decomposition(char) kind = decomp.split()[0] if kind.startswith('<'): results[kind] += 1 else: print(_escape(char), unicodedata.name(char)) except UnicodeEncodeError: pass print(results)
def test_enforce(self): profile = get_profile('NicknameCaseMapped:ToLower') self.assertEqual(profile.enforce('Juliet'), 'juliet') self.assertEqual(profile.enforce('E\u0301\u0301\u0301'), '\u00e9\u0301\u0301') self.assertEqual(profile.enforce('\u03d4'), '\u03cb')
def test_missing(self): with self.assertRaises(KeyError): get_profile('_does_not_exist_')
def test_enforce(self): profile = get_profile('UsernameCaseMapped') self.assertEqual(profile.enforce('Juliet'), 'juliet') self.assertEqual(profile.enforce('E\u0301\u0301\u0301'), '\u00e9\u0301\u0301')
def test_invalid_argument(self): profile = get_profile('UsernameCasePreserved') with self.assertRaisesRegex(ValueError, 'not a string'): profile.enforce(1)
def test_unicodedata_arg(self): import unicodedata profile = get_profile('UsernameCasePreserved', unicodedata=unicodedata) self.assertEqual(profile.enforce('E\u0301\u0301\u0301'), '\u00c9\u0301\u0301')