def rawwcharp2unicoden(wcp, maxlen): b = UnicodeBuilder(maxlen) i = 0 while i < maxlen and rffi.cast(lltype.Signed, wcp[i]) != 0: b.append(code_to_unichr(wcp[i])) i += 1 return assert_str0(b.build())
def get(self, ch, errorchar): space = self.space # get the character from the mapping if self.mapping_w is not None: w_ch = self.mapping_w[ord(ch)] else: try: w_ch = space.getitem(self.w_mapping, space.newint(ord(ch))) except OperationError as e: if not e.match(space, space.w_LookupError): raise return errorchar if space.isinstance_w(w_ch, space.w_unicode): # Charmap may return a unicode string return space.unicode_w(w_ch) elif space.isinstance_w(w_ch, space.w_int): # Charmap may return a number x = space.int_w(w_ch) if not 0 <= x <= 0x10FFFF: raise oefmt(space.w_TypeError, "character mapping must be in range(0x110000)") return code_to_unichr(x) elif space.is_w(w_ch, space.w_None): # Charmap may return None return errorchar raise oefmt(space.w_TypeError, "character mapping must return integer, None or str")
def lookup(self, space, name): try: code = self._lookup(name.upper()) except KeyError: msg = space.mod(space.wrap("undefined character name '%s'"), space.wrap(name)) raise OperationError(space.w_KeyError, msg) return space.wrap(code_to_unichr(code))
def get(self, ch, errorchar): space = self.space # get the character from the mapping if self.mapping_w is not None: w_ch = self.mapping_w[ord(ch)] else: try: w_ch = space.getitem(self.w_mapping, space.newint(ord(ch))) except OperationError as e: if not e.match(space, space.w_LookupError): raise return errorchar if space.isinstance_w(w_ch, space.w_unicode): # Charmap may return a unicode string return space.unicode_w(w_ch) elif space.isinstance_w(w_ch, space.w_int): # Charmap may return a number x = space.int_w(w_ch) if not 0 <= x <= 0x10FFFF: raise oefmt(space.w_TypeError, "character mapping must be in range(0x110000)") return code_to_unichr(x) elif space.is_w(w_ch, space.w_None): # Charmap may return None return errorchar raise oefmt(space.w_TypeError, "character mapping must return integer, None or unicode")
def lookup(self, space, name): try: code = self._lookup(name.upper()) except KeyError: msg = space.mod(space.newtext("undefined character name '%s'"), space.newtext(name)) raise OperationError(space.w_KeyError, msg) return space.newunicode(code_to_unichr(code))
def lookup(self, space, name): try: code = self._lookup(name.upper(), with_named_sequence=True) except KeyError: msg = space.mod(space.newtext("undefined character name '%s'"), space.newtext(name)) raise OperationError(space.w_KeyError, msg) # The code may be a named sequence sequence = self._lookup_named_sequence(code) if sequence is not None: # named sequences only contain UCS2 codes, no surrogates &co. return space.newunicode(sequence) return space.newunicode(code_to_unichr(code))
def decode_escape_sequence_unicode(self, i, builder): # at this point we are just after the 'u' of the \u1234 sequence. start = i i += 4 hexdigits = self.getslice(start, i) try: val = int(hexdigits, 16) if val & 0xfc00 == 0xd800: # surrogate pair val = self.decode_surrogate_pair(i, val) i += 6 except ValueError: self._raise("Invalid \uXXXX escape (char %d)", i-1) return # help the annotator to know that we'll never go beyond # this point # uchr = runicode.code_to_unichr(val) # may be a surrogate pair again utf8_ch = unicodehelper.encode_utf8(self.space, uchr) builder.append(utf8_ch) return i
def decode_escape_sequence_unicode(self, i, builder): # at this point we are just after the 'u' of the \u1234 sequence. start = i i += 4 hexdigits = self.getslice(start, i) try: val = int(hexdigits, 16) if val & 0xfc00 == 0xd800: # surrogate pair val = self.decode_surrogate_pair(i, val) i += 6 except ValueError: self._raise("Invalid \uXXXX escape (char %d)", i - 1) return # help the annotator to know that we'll never go beyond # this point # uchr = runicode.code_to_unichr(val) # may be a surrogate pair again utf8_ch = unicodehelper.encode_utf8(self.space, uchr) builder.append(utf8_ch) return i
class Charmap_Decode: def __init__(self, space, w_mapping): self.space = space self.w_mapping = w_mapping # fast path for all the stuff in the encodings module if space.isinstance_w(w_mapping, space.w_tuple): self.mapping_w = space.fixedview(w_mapping) else: self.mapping_w = None def get(self, ch, errorchar): space = self.space # get the character from the mapping if self.mapping_w is not None: w_ch = self.mapping_w[ord(ch)] else: try: w_ch = space.getitem(self.w_mapping, space.newint(ord(ch))) except OperationError, e: if not e.match(space, space.w_LookupError): raise return errorchar if space.isinstance_w(w_ch, space.w_unicode): # Charmap may return a unicode string return space.unicode_w(w_ch) elif space.isinstance_w(w_ch, space.w_int): # Charmap may return a number x = space.int_w(w_ch) if not 0 <= x <= 0x10FFFF: raise oefmt(space.w_TypeError, "character mapping must be in range(0x110000)") return code_to_unichr(x) elif space.is_w(w_ch, space.w_None): # Charmap may return None return errorchar raise oefmt(space.w_TypeError, "character mapping must return integer, None or unicode")
def decode_escape_sequence_unicode(self, i, builder): # at this point we are just after the 'u' of the \u1234 sequence. start = i i += 4 hexdigits = self.getslice(start, i) try: val = int(hexdigits, 16) if sys.maxunicode > 65535 and 0xd800 <= val <= 0xdfff: # surrogate pair if self.ll_chars[i] == '\\' and self.ll_chars[i + 1] == 'u': val = self.decode_surrogate_pair(i, val) i += 6 except ValueError: raise DecoderError("Invalid \\uXXXX escape", i - 1) # uchr = runicode.code_to_unichr(val) # may be a surrogate pair again utf8_ch = unicodehelper.encode_utf8(self.space, uchr, allow_surrogates=True) builder.append(utf8_ch) return i
def f(c): return ord(code_to_unichr(c)[0])
def f(c): return code_to_unichr(c) + u''