def ll_search(s1, s2, start, end, mode): from rpython.rtyper.annlowlevel import hlstr, hlunicode from rpython.rlib import rstring tp = typeOf(s1) if tp == string_repr.lowleveltype or tp == Char: return rstring._search(hlstr(s1), hlstr(s2), start, end, mode) else: return rstring._search(hlunicode(s1), hlunicode(s2), start, end, mode)
def test_unicode(self): sb = UnicodeBuilderRepr.ll_new(32) UnicodeBuilderRepr.ll_append_char(sb, u'x') UnicodeBuilderRepr.ll_append(sb, llunicode(u"abc")) UnicodeBuilderRepr.ll_append_slice(sb, llunicode(u"foobar"), 2, 5) UnicodeBuilderRepr.ll_append_multiple_char(sb, u'y', 30) u = UnicodeBuilderRepr.ll_build(sb) assert hlunicode(u) == u"xabcoba" + u"y" * 30
def ll_encode_utf8(self, ll_s): from rpython.rtyper.annlowlevel import hlunicode s = hlunicode(ll_s) assert s is not None bytes = self.runicode_encode_utf_8( s, len(s), 'strict', errorhandler=self.ll_raise_unicode_exception_encode, allow_surrogates=False) return self.ll.llstr(bytes)
def ll_encode_utf8(self, ll_s): from rpython.rtyper.annlowlevel import hlunicode from rpython.rlib import runicode s = hlunicode(ll_s) assert s is not None errorhandler = runicode.default_unicode_error_encode # NB. keep the arguments in sync with annotator/unaryop.py bytes = runicode.unicode_encode_utf_8_elidable(s, len(s), 'strict', errorhandler, True) return self.ll.llstr(bytes)
def strslice2unicode_latin1(s, start, end): """ Convert s[start:end] to unicode. s is supposed to be an RPython string encoded in latin-1, which means that the numeric value of each char is the same as the corresponding unicode code point. Internally it's implemented at the level of low-level helpers, to avoid the extra copy we would need if we take the actual slice first. No bound checking is done, use carefully. """ from rpython.rtyper.annlowlevel import llstr, hlunicode from rpython.rtyper.lltypesystem.rstr import malloc, UNICODE from rpython.rtyper.lltypesystem.lltype import cast_primitive, UniChar length = end - start ll_s = llstr(s) ll_res = malloc(UNICODE, length) ll_res.hash = 0 for i in range(length): ch = ll_s.chars[start + i] ll_res.chars[i] = cast_primitive(UniChar, ch) return hlunicode(ll_res)
def strslice2unicode_latin1(s, start, end): """ Convert s[start:end] to unicode. s is supposed to be an RPython string encoded in latin-1, which means that the numeric value of each char is the same as the corresponding unicode code point. Internally it's implemented at the level of low-level helpers, to avoid the extra copy we would need if we take the actual slice first. No bound checking is done, use carefully. """ from rpython.rtyper.annlowlevel import llstr, hlunicode from rpython.rtyper.lltypesystem.rstr import malloc, UNICODE from rpython.rtyper.lltypesystem.lltype import cast_primitive, UniChar length = end-start ll_s = llstr(s) ll_res = malloc(UNICODE, length) ll_res.hash = 0 for i in range(length): ch = ll_s.chars[start+i] ll_res.chars[i] = cast_primitive(UniChar, ch) return hlunicode(ll_res)
def f(arg): s = llunicode(hlunicode(arg)) return len(s.chars)
def test_hlunicode(self): s = mallocunicode(3) s.chars[0] = u"a" s.chars[1] = u"b" s.chars[2] = u"c" assert hlunicode(s) == u"abc"
def bh_unicodehash(self, string): u = lltype.cast_opaque_ptr(lltype.Ptr(rstr.UNICODE), string) return compute_hash(hlunicode(u))