def normalize(form, unistr): """ Return the normal form 'form' for the Unicode string unistr. Valid values for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'. """ try: normalizer_form = _forms[form] except KeyError: raise ValueError('invalid normalization form') return Normalizer.normalize(unistr, normalizer_form)
def decomposition(unichr): _validate_unichr(unichr) d = Normalizer.decompose(unichr, True) decomp_type = None if len(d) == 1: decomp_type = _get_decomp_type(unichr) else: for c in d: decomp_type = _get_decomp_type(c) # print "Got a decomp_type %r %r %r" % (c, d, decomp_type) if decomp_type is not None: break hexed = " ".join(("{0:04X}".format(ord(c)) for c in d)) if decomp_type: return "<{}> {}".format(decomp_type, hexed) elif len(d) == 1: return "" else: return hexed
def decomposition(chr): _validate_unichr(chr) d = Normalizer.decompose(chr, True) decomp_type = None if len(d) == 1: decomp_type = _get_decomp_type(chr) else: for c in d: decomp_type = _get_decomp_type(c) # print "Got a decomp_type %r %r %r" % (c, d, decomp_type) if decomp_type is not None: break hexed = " ".join(("{0:04X}".format(ord(c)) for c in d)) if decomp_type: return "<{}> {}".format(decomp_type, hexed) elif len(d) == 1: return "" else: return hexed