def convert_array_from_object(self, cdata, w_ob): space = self.space if (space.isinstance_w(w_ob, space.w_list) or space.isinstance_w(w_ob, space.w_tuple)): self._convert_array_from_listview(cdata, w_ob) elif (self.can_cast_anything or (self.ctitem.is_primitive_integer and self.ctitem.size == rffi.sizeof(lltype.Char))): if not space.isinstance_w(w_ob, space.w_str): raise self._convert_error("str or list or tuple", w_ob) s = space.str_w(w_ob) n = len(s) if self.length >= 0 and n > self.length: raise oefmt(space.w_IndexError, "initializer string is too long for '%s' (got %d " "characters)", self.name, n) copy_string_to_raw(llstr(s), cdata, 0, n) if n != self.length: cdata[n] = '\x00' elif isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveUniChar): if not space.isinstance_w(w_ob, space.w_unicode): raise self._convert_error("unicode or list or tuple", w_ob) s = space.unicode_w(w_ob) n = len(s) if self.length >= 0 and n > self.length: raise oefmt(space.w_IndexError, "initializer unicode string is too long for '%s' " "(got %d characters)", self.name, n) unichardata = rffi.cast(rffi.CWCHARP, cdata) copy_unicode_to_raw(llunicode(s), unichardata, 0, n) if n != self.length: unichardata[n] = u'\x00' else: raise self._convert_error("list or tuple", w_ob)
def rawstring2charp(space, address, newcontent, offset=0, size=-1): from rpython.rtyper.annlowlevel import llstr from rpython.rtyper.lltypesystem.rstr import copy_string_to_raw array = rffi.cast(rffi.CCHARP, address) if size < 0: size = len(newcontent) - offset copy_string_to_raw(llstr(newcontent), array, offset, size)
def test_deflate_set_dictionary(): text = 'abcabc' zdict = 'abc' stream = rzlib.deflateInit() rzlib.deflateSetDictionary(stream, zdict) bytes = rzlib.compress(stream, text, rzlib.Z_FINISH) rzlib.deflateEnd(stream) stream2 = rzlib.inflateInit() from rpython.rtyper.lltypesystem import lltype, rffi, rstr from rpython.rtyper.annlowlevel import llstr from rpython.rlib.rstring import StringBuilder with lltype.scoped_alloc(rffi.CCHARP.TO, len(bytes)) as inbuf: rstr.copy_string_to_raw(llstr(bytes), inbuf, 0, len(bytes)) stream2.c_next_in = rffi.cast(rzlib.Bytefp, inbuf) rffi.setintfield(stream2, 'c_avail_in', len(bytes)) with lltype.scoped_alloc(rffi.CCHARP.TO, 100) as outbuf: stream2.c_next_out = rffi.cast(rzlib.Bytefp, outbuf) bufsize = 100 rffi.setintfield(stream2, 'c_avail_out', bufsize) err = rzlib._inflate(stream2, rzlib.Z_SYNC_FLUSH) assert err == rzlib.Z_NEED_DICT rzlib.inflateSetDictionary(stream2, zdict) rzlib._inflate(stream2, rzlib.Z_SYNC_FLUSH) avail_out = rffi.cast(lltype.Signed, stream2.c_avail_out) result = StringBuilder() result.append_charpsize(outbuf, bufsize - avail_out) rzlib.inflateEnd(stream2) assert result.build() == text
def get_nonmovingbuffer(data): """ Either returns a non-moving copy or performs neccessary pointer arithmetic to return a pointer to the characters of a string if the string is already nonmovable or could be pinned. Must be followed by a free_nonmovingbuffer call. First bool returned indicates if 'data' was pinned. Second bool returned indicates if we did a raw alloc because pinning failed. Both bools should never be true at the same time. """ lldata = llstrtype(data) count = len(data) pinned = False if rgc.can_move(data): if rgc.pin(data): pinned = True else: buf = lltype.malloc(TYPEP.TO, count, flavor='raw') copy_string_to_raw(lldata, buf, 0, count) return buf, pinned, True # ^^^ raw malloc used to get a nonmovable copy # # following code is executed if: # - rgc.can_move(data) and rgc.pin(data) both returned true # - rgc.can_move(data) returned false data_start = cast_ptr_to_adr(lldata) + \ offsetof(STRTYPE, 'chars') + itemoffsetof(STRTYPE.chars, 0) return cast(TYPEP, data_start), pinned, False
def memmove(space, w_dest, w_src, n): if n < 0: raise oefmt(space.w_ValueError, "negative size") # cases... src_buf = None src_data = lltype.nullptr(rffi.CCHARP.TO) if isinstance(w_src, cdataobj.W_CData): src_data = unsafe_escaping_ptr_for_ptr_or_array(w_src) src_is_ptr = True else: src_buf = _fetch_as_read_buffer(space, w_src) try: src_data = src_buf.get_raw_address() src_is_ptr = True except ValueError: src_is_ptr = False if src_is_ptr: src_string = None else: if n == src_buf.getlength(): src_string = src_buf.as_str() else: src_string = src_buf.getslice(0, n, 1, n) dest_buf = None dest_data = lltype.nullptr(rffi.CCHARP.TO) if isinstance(w_dest, cdataobj.W_CData): dest_data = unsafe_escaping_ptr_for_ptr_or_array(w_dest) dest_is_ptr = True else: dest_buf = _fetch_as_write_buffer(space, w_dest) try: dest_data = dest_buf.get_raw_address() dest_is_ptr = True except ValueError: dest_is_ptr = False if dest_is_ptr: if src_is_ptr: c_memmove(dest_data, src_data, rffi.cast(rffi.SIZE_T, n)) else: copy_string_to_raw(llstr(src_string), dest_data, 0, n) else: # nowadays this case should be rare or impossible: as far as # I know, all common types implementing the *writable* buffer # interface now support get_raw_address() if src_is_ptr: for i in range(n): dest_buf.setitem(i, src_data[i]) else: for i in range(n): dest_buf.setitem(i, src_string[i]) keepalive_until_here(src_buf) keepalive_until_here(dest_buf) keepalive_until_here(w_src) keepalive_until_here(w_dest)
def _operate(stream, data, flush, max_length, cfunc, while_doing): """Common code for compress() and decompress(). """ # Prepare the input buffer for the stream with lltype.scoped_alloc(rffi.CCHARP.TO, len(data)) as inbuf: # XXX (groggi) should be possible to improve this with pinning by # not performing the 'copy_string_to_raw' if non-movable/pinned copy_string_to_raw(llstr(data), inbuf, 0, len(data)) stream.c_next_in = rffi.cast(Bytefp, inbuf) rffi.setintfield(stream, 'c_avail_in', len(data)) # Prepare the output buffer with lltype.scoped_alloc(rffi.CCHARP.TO, OUTPUT_BUFFER_SIZE) as outbuf: # Strategy: we call deflate() to get as much output data as fits in # the buffer, then accumulate all output into a StringBuffer # 'result'. result = StringBuilder() while True: stream.c_next_out = rffi.cast(Bytefp, outbuf) bufsize = OUTPUT_BUFFER_SIZE if max_length < bufsize: if max_length <= 0: err = Z_OK break bufsize = max_length max_length -= bufsize rffi.setintfield(stream, 'c_avail_out', bufsize) err = cfunc(stream, flush) if err == Z_OK or err == Z_STREAM_END: # accumulate data into 'result' avail_out = rffi.cast(lltype.Signed, stream.c_avail_out) result.append_charpsize(outbuf, bufsize - avail_out) # if the output buffer is full, there might be more data # so we need to try again. Otherwise, we're done. if avail_out > 0: break # We're also done if we got a Z_STREAM_END (which should # only occur when flush == Z_FINISH). if err == Z_STREAM_END: break else: continue elif err == Z_BUF_ERROR: avail_out = rffi.cast(lltype.Signed, stream.c_avail_out) # When compressing, we will only get Z_BUF_ERROR if # the output buffer was full but there wasn't more # output when we tried again, so it is not an error # condition. if avail_out == bufsize: break # fallback case: report this error raise RZlibError.fromstream(stream, err, while_doing) # When decompressing, if the compressed stream of data was truncated, # then the zlib simply returns Z_OK and waits for more. If it is # complete it returns Z_STREAM_END. return (result.build(), err, rffi.cast(lltype.Signed, stream.c_avail_in))
def _operate(stream, data, flush, max_length, cfunc, while_doing): """Common code for compress() and decompress(). """ # Prepare the input buffer for the stream with lltype.scoped_alloc(rffi.CCHARP.TO, len(data)) as inbuf: copy_string_to_raw(llstr(data), inbuf, 0, len(data)) stream.c_next_in = rffi.cast(Bytefp, inbuf) rffi.setintfield(stream, 'c_avail_in', len(data)) # Prepare the output buffer with lltype.scoped_alloc(rffi.CCHARP.TO, OUTPUT_BUFFER_SIZE) as outbuf: # Strategy: we call deflate() to get as much output data as fits in # the buffer, then accumulate all output into a StringBuffer # 'result'. result = StringBuilder() while True: stream.c_next_out = rffi.cast(Bytefp, outbuf) bufsize = OUTPUT_BUFFER_SIZE if max_length < bufsize: if max_length <= 0: err = Z_OK break bufsize = max_length max_length -= bufsize rffi.setintfield(stream, 'c_avail_out', bufsize) err = cfunc(stream, flush) if err == Z_OK or err == Z_STREAM_END: # accumulate data into 'result' avail_out = rffi.cast(lltype.Signed, stream.c_avail_out) result.append_charpsize(outbuf, bufsize - avail_out) # if the output buffer is full, there might be more data # so we need to try again. Otherwise, we're done. if avail_out > 0: break # We're also done if we got a Z_STREAM_END (which should # only occur when flush == Z_FINISH). if err == Z_STREAM_END: break else: continue elif err == Z_BUF_ERROR: avail_out = rffi.cast(lltype.Signed, stream.c_avail_out) # When compressing, we will only get Z_BUF_ERROR if # the output buffer was full but there wasn't more # output when we tried again, so it is not an error # condition. if avail_out == bufsize: break # fallback case: report this error raise RZlibError.fromstream(stream, err, while_doing) # When decompressing, if the compressed stream of data was truncated, # then the zlib simply returns Z_OK and waits for more. If it is # complete it returns Z_STREAM_END. return (result.build(), err, rffi.cast(lltype.Signed, stream.c_avail_in))
def str2charp(s, track_allocation=True): """ str -> char* """ if track_allocation: array = lltype.malloc(TYPEP.TO, len(s) + 1, flavor='raw', track_allocation=True) else: array = lltype.malloc(TYPEP.TO, len(s) + 1, flavor='raw', track_allocation=False) i = len(s) ll_s = llstrtype(s) copy_string_to_raw(ll_s, array, 0, i) array[i] = lastchar return array
def _frombytes(self, space, s): if len(s) % self.itemsize != 0: raise oefmt(space.w_ValueError, "bytes length not a multiple of item size") oldlen = self.len new = len(s) / self.itemsize if not new: return self.setlen(oldlen + new) cbuf = self._charbuf_start() copy_string_to_raw(llstr(s), rffi.ptradd(cbuf, oldlen * self.itemsize), 0, len(s)) self._charbuf_stop()
def convert_array_from_object(self, cdata, w_ob): space = self.space if (space.isinstance_w(w_ob, space.w_list) or space.isinstance_w(w_ob, space.w_tuple)): if self.ctitem.pack_list_of_items(cdata, w_ob): # fast path pass else: self._convert_array_from_listview(cdata, space.listview(w_ob)) elif self.accept_str: if not space.isinstance_w(w_ob, space.w_bytes): raise self._convert_error("str or list or tuple", w_ob) s = space.bytes_w(w_ob) n = len(s) if self.length >= 0 and n > self.length: raise oefmt( space.w_IndexError, "initializer string is too long for '%s' (got %d " "characters)", self.name, n) if isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveBool): self._must_be_string_of_zero_or_one(s) copy_string_to_raw(llstr(s), cdata, 0, n) if n != self.length: cdata[n] = '\x00' elif isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveUniChar): from pypy.module._cffi_backend import wchar_helper if not space.isinstance_w(w_ob, space.w_unicode): raise self._convert_error("unicode or list or tuple", w_ob) s = space.unicode_w(w_ob) if self.ctitem.size == 2: n = wchar_helper.unicode_size_as_char16(s) else: n = wchar_helper.unicode_size_as_char32(s) if self.length >= 0 and n > self.length: raise oefmt( space.w_IndexError, "initializer unicode string is too long for '%s' " "(got %d characters)", self.name, n) add_final_zero = (n != self.length) if self.ctitem.size == 2: try: wchar_helper.unicode_to_char16(s, cdata, n, add_final_zero) except wchar_helper.OutOfRange as e: raise oefmt( self.space.w_ValueError, "unicode character ouf of range for " "conversion to char16_t: %s", hex(e.ordinal)) else: wchar_helper.unicode_to_char32(s, cdata, n, add_final_zero) else: raise self._convert_error("list or tuple", w_ob)
def _frombytes(self, space, s): if len(s) % self.itemsize != 0: raise oefmt(space.w_ValueError, "bytes length not a multiple of item size") # CPython accepts invalid unicode # self.check_valid_unicode(space, s) # empty for non-u arrays oldlen = self.len new = len(s) / self.itemsize if not new: return self.setlen(oldlen + new) cbuf = self._charbuf_start() copy_string_to_raw(llstr(s), rffi.ptradd(cbuf, oldlen * self.itemsize), 0, len(s)) self._charbuf_stop()
def get_nonmovingbuffer(data): """ Either returns a non-moving copy or performs neccessary pointer arithmetic to return a pointer to the characters of a string if the string is already nonmovable. Must be followed by a free_nonmovingbuffer call. """ lldata = llstrtype(data) if rgc.can_move(data): count = len(data) buf = lltype.malloc(TYPEP.TO, count, flavor='raw') copy_string_to_raw(lldata, buf, 0, count) return buf else: data_start = cast_ptr_to_adr(lldata) + \ offsetof(STRTYPE, 'chars') + itemoffsetof(STRTYPE.chars, 0) return cast(TYPEP, data_start)
def _do_setslice(self, w_slice, w_value, ptr): ctptr, start, length = self._do_getslicearg(w_slice) ctitem = ctptr.ctitem ctitemsize = ctitem.size target = rffi.ptradd(ptr, start * ctitemsize) # if isinstance(w_value, W_CData): from pypy.module._cffi_backend import ctypearray ctv = w_value.ctype if (isinstance(ctv, ctypearray.W_CTypeArray) and ctv.ctitem is ctitem and w_value.get_array_length() == length): # fast path: copying from exactly the correct type with w_value as source: source = rffi.cast(rffi.VOIDP, source) target = rffi.cast(rffi.VOIDP, target) size = rffi.cast(rffi.SIZE_T, ctitemsize * length) rffi.c_memcpy(target, source, size) return # # A fast path for <char[]>[0:N] = "somestring" or some bytearray. from pypy.module._cffi_backend import ctypeprim space = self.space if isinstance(ctitem, ctypeprim.W_CTypePrimitive) and ctitem.size == 1: if space.isinstance_w(w_value, space.w_bytes): from rpython.rtyper.annlowlevel import llstr from rpython.rtyper.lltypesystem.rstr import copy_string_to_raw value = space.bytes_w(w_value) if len(value) != length: raise oefmt(space.w_ValueError, "need a string of length %d, got %d", length, len(value)) copy_string_to_raw(llstr(value), target, 0, length) return if space.isinstance_w(w_value, space.w_bytearray): value = w_value.bytearray_list_of_chars_w(space) if len(value) != length: raise oefmt(space.w_ValueError, "need a bytearray of length %d, got %d", length, len(value)) self._copy_list_of_chars_to_raw(value, target, length) return # self._do_setslice_iterate(space, ctitem, w_value, target, ctitemsize, length)
def _do_setslice(self, w_slice, w_value, ptr): ctptr, start, length = self._do_getslicearg(w_slice) ctitem = ctptr.ctitem ctitemsize = ctitem.size target = rffi.ptradd(ptr, start * ctitemsize) # if isinstance(w_value, W_CData): from pypy.module._cffi_backend import ctypearray ctv = w_value.ctype if ( isinstance(ctv, ctypearray.W_CTypeArray) and ctv.ctitem is ctitem and w_value.get_array_length() == length ): # fast path: copying from exactly the correct type with w_value as source: source = rffi.cast(rffi.VOIDP, source) target = rffi.cast(rffi.VOIDP, target) size = rffi.cast(rffi.SIZE_T, ctitemsize * length) rffi.c_memcpy(target, source, size) return # # A fast path for <char[]>[0:N] = "somestring" or some bytearray. from pypy.module._cffi_backend import ctypeprim space = self.space if isinstance(ctitem, ctypeprim.W_CTypePrimitive) and ctitem.size == 1: if space.isinstance_w(w_value, space.w_str): from rpython.rtyper.annlowlevel import llstr from rpython.rtyper.lltypesystem.rstr import copy_string_to_raw value = space.str_w(w_value) if len(value) != length: raise oefmt(space.w_ValueError, "need a string of length %d, got %d", length, len(value)) copy_string_to_raw(llstr(value), target, 0, length) return if space.isinstance_w(w_value, space.w_bytearray): value = w_value.bytearray_list_of_chars_w(space) if len(value) != length: raise oefmt(space.w_ValueError, "need a bytearray of length %d, got %d", length, len(value)) self._copy_list_of_chars_to_raw(value, target, length) return # self._do_setslice_iterate(space, ctitem, w_value, target, ctitemsize, length)
def _do_setslice(self, w_slice, w_value): ctptr, start, length = self._do_getslicearg(w_slice) ctitem = ctptr.ctitem ctitemsize = ctitem.size cdata = rffi.ptradd(self._cdata, start * ctitemsize) # if isinstance(w_value, W_CData): from pypy.module._cffi_backend import ctypearray ctv = w_value.ctype if (isinstance(ctv, ctypearray.W_CTypeArray) and ctv.ctitem is ctitem and w_value.get_array_length() == length): # fast path: copying from exactly the correct type s = w_value._cdata rffi.c_memcpy(cdata, s, ctitemsize * length) keepalive_until_here(w_value) return # # A fast path for <char[]>[0:N] = "somestring". from pypy.module._cffi_backend import ctypeprim space = self.space if (space.isinstance_w(w_value, space.w_str) and isinstance(ctitem, ctypeprim.W_CTypePrimitiveChar)): from rpython.rtyper.annlowlevel import llstr from rpython.rtyper.lltypesystem.rstr import copy_string_to_raw value = space.str_w(w_value) if len(value) != length: raise oefmt(space.w_ValueError, "need a string of length %d, got %d", length, len(value)) copy_string_to_raw(llstr(value), cdata, 0, length) return # w_iter = space.iter(w_value) for i in range(length): try: w_item = space.next(w_iter) except OperationError, e: if not e.match(space, space.w_StopIteration): raise raise oefmt(space.w_ValueError, "need %d values to unpack, got %d", length, i) ctitem.convert_from_object(cdata, w_item) cdata = rffi.ptradd(cdata, ctitemsize)
def descr_fromstring(self, space, w_s): """ fromstring(string) Appends items from the string, interpreting it as an array of machine values,as if it had been read from a file using the fromfile() method). """ s = space.getarg_w('s#', w_s) if len(s) % self.itemsize != 0: msg = 'string length not a multiple of item size' raise OperationError(self.space.w_ValueError, self.space.wrap(msg)) oldlen = self.len new = len(s) / self.itemsize if not new: return self.setlen(oldlen + new) cbuf = self._charbuf_start() copy_string_to_raw(llstr(s), rffi.ptradd(cbuf, oldlen * self.itemsize), 0, len(s)) self._charbuf_stop()
def get_nonmovingbuffer(data): """ Either returns a non-moving copy or performs neccessary pointer arithmetic to return a pointer to the characters of a string if the string is already nonmovable or could be pinned. Must be followed by a free_nonmovingbuffer call. Also returns a char: * \4: no pinning, returned pointer is inside 'data' which is nonmovable * \5: 'data' was pinned, returned pointer is inside * \6: pinning failed, returned pointer is raw malloced For strings (not unicodes), the len()th character of the resulting raw buffer is available, but not initialized. Use get_nonmovingbuffer_final_null() instead of get_nonmovingbuffer() to get a regular null-terminated "char *". """ lldata = llstrtype(data) count = len(data) if we_are_translated_to_c() and not rgc.can_move(data): flag = '\x04' else: if we_are_translated_to_c() and rgc.pin(data): flag = '\x05' else: buf = lltype.malloc(TYPEP.TO, count + (TYPEP is CCHARP), flavor='raw') copy_string_to_raw(lldata, buf, 0, count) return buf, '\x06' # ^^^ raw malloc used to get a nonmovable copy # # following code is executed after we're translated to C, if: # - rgc.can_move(data) and rgc.pin(data) both returned true # - rgc.can_move(data) returned false data_start = cast_ptr_to_adr(lldata) + \ offsetof(STRTYPE, 'chars') + itemoffsetof(STRTYPE.chars, 0) return cast(TYPEP, data_start), flag
def convert_array_from_object(self, cdata, w_ob): space = self.space if (space.isinstance_w(w_ob, space.w_list) or space.isinstance_w(w_ob, space.w_tuple)): if self.ctitem.pack_list_of_items(cdata, w_ob): # fast path pass else: self._convert_array_from_listview(cdata, space.listview(w_ob)) elif self.accept_str: if not space.isinstance_w(w_ob, space.w_bytes): raise self._convert_error("str or list or tuple", w_ob) s = space.bytes_w(w_ob) n = len(s) if self.length >= 0 and n > self.length: raise oefmt( space.w_IndexError, "initializer string is too long for '%s' (got %d " "characters)", self.name, n) if isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveBool): self._must_be_string_of_zero_or_one(s) copy_string_to_raw(llstr(s), cdata, 0, n) if n != self.length: cdata[n] = '\x00' elif isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveUniChar): if not space.isinstance_w(w_ob, space.w_unicode): raise self._convert_error("unicode or list or tuple", w_ob) s = space.unicode_w(w_ob) n = len(s) if self.length >= 0 and n > self.length: raise oefmt( space.w_IndexError, "initializer unicode string is too long for '%s' " "(got %d characters)", self.name, n) unichardata = rffi.cast(rffi.CWCHARP, cdata) copy_unicode_to_raw(llunicode(s), unichardata, 0, n) if n != self.length: unichardata[n] = u'\x00' else: raise self._convert_error("list or tuple", w_ob)
def descr_fromstring(self, space, w_s): """ fromstring(string) Appends items from the string, interpreting it as an array of machine values,as if it had been read from a file using the fromfile() method). """ if self is w_s: raise oefmt(space.w_ValueError, "array.fromstring(x): x cannot be self") s = space.getarg_w('s#', w_s) if len(s) % self.itemsize != 0: raise oefmt(self.space.w_ValueError, "string length not a multiple of item size") # CPython accepts invalid unicode # self.check_valid_unicode(space, s) # empty for non-u arrays oldlen = self.len new = len(s) / self.itemsize if not new: return self.setlen(oldlen + new) cbuf = self._charbuf_start() copy_string_to_raw(llstr(s), rffi.ptradd(cbuf, oldlen * self.itemsize), 0, len(s)) self._charbuf_stop()
def str2chararray(s, array, maxsize): length = min(len(s), maxsize) ll_s = llstrtype(s) copy_string_to_raw(ll_s, array, 0, length) return length
def rawstring2charp(space, address, newcontent): from rpython.rtyper.annlowlevel import llstr from rpython.rtyper.lltypesystem.rstr import copy_string_to_raw array = rffi.cast(rffi.CCHARP, address) copy_string_to_raw(llstr(newcontent), array, 0, len(newcontent))
def get_compiled_regex_cache(interp, regex): pce = interp.space.regex_cache.get(regex) if pce is not None: return pce if '\x00' in regex: raise ExitFunctionWithError("Null byte in regex") # Parse through the leading whitespace, and display a warning if we # get to the end without encountering a delimiter. i = 0 while i < len(regex) and regex[i].isspace(): i += 1 if i == len(regex): raise ExitFunctionWithError("Empty regular expression") # Get the delimiter and display a warning if it is alphanumeric # or a backslash. delimiter = regex[i] if delimiter.isalnum() or delimiter == '\\': raise ExitFunctionWithError("Delimiter must not be alphanumeric " "or backslash") i += 1 pattern_start = i start_delimiter = delimiter if delimiter == '(': delimiter = ')' elif delimiter == '[': delimiter = ']' elif delimiter == '{': delimiter = '}' elif delimiter == '<': delimiter = '>' end_delimiter = delimiter if start_delimiter == end_delimiter: # We need to iterate through the pattern, searching for the # ending delimiter, but skipping the backslashed delimiters. # If the ending delimiter is not found, display a warning. while i < len(regex): if regex[i] == '\\': i += 1 elif regex[i] == end_delimiter: break i += 1 else: raise ExitFunctionWithError("No ending delimiter '%s' found" % delimiter[:]) else: # We iterate through the pattern, searching for the matching # ending delimiter. For each matching starting delimiter, we # increment nesting level, and decrement it for each matching # ending delimiter. If we reach the end of the pattern without # matching, display a warning. brackets = 1 # brackets nesting level while i < len(regex): if regex[i] == '\\': i += 1 elif regex[i] == end_delimiter: brackets -= 1 if brackets == 0: break elif regex[i] == start_delimiter: brackets += 1 i += 1 else: raise ExitFunctionWithError("No ending matching delimiter '%s' " "found" % delimiter[:]) # Move on to the options pattern_end = i i += 1 # Parse through the options, setting appropriate flags. Display # a warning if we encounter an unknown modifier. coptions = 0 poptions = 0 do_study = False while i < len(regex): option = regex[i] i += 1 # Perl compatible options if option == 'i': coptions |= _pcre.PCRE_CASELESS elif option == 'm': coptions |= _pcre.PCRE_MULTILINE elif option == 's': coptions |= _pcre.PCRE_DOTALL elif option == 'x': coptions |= _pcre.PCRE_EXTENDED # PCRE specific options elif option == 'A': coptions |= _pcre.PCRE_ANCHORED elif option == 'D': coptions |= _pcre.PCRE_DOLLAR_ENDONLY elif option == 'S': do_study = True elif option == 'U': coptions |= _pcre.PCRE_UNGREEDY elif option == 'X': coptions |= _pcre.PCRE_EXTRA elif option == 'u': coptions |= _pcre.PCRE_UTF8 if _pcre.PCRE_UCP is not None: coptions |= _pcre.PCRE_UCP # Custom preg options elif option == 'e': poptions |= PREG_REPLACE_EVAL raise ExitFunctionWithError("The deprecated /e modifier is not " "supported by hippy") elif option == ' ': pass elif option == '\n': pass else: raise ExitFunctionWithError("Unknown modifier '%s'" % option[:]) # XXX missing: #if HAVE_SETLOCALE # if (strcmp(locale, "C")) # tables = pcre_maketables(); #endif # Make a copy of the actual pattern. length = pattern_end - pattern_start pattern = lltype.malloc(rffi.CCHARP.TO, length + 1, flavor='raw') copy_string_to_raw(llstr(regex), pattern, pattern_start, length) pattern[length] = '\x00' # Compile pattern and display a warning if compilation failed. p_error = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw', zero=True) p_erroffset = lltype.malloc(rffi.INTP.TO, 1, flavor='raw', zero=True) tables = lltype.nullptr(rffi.CCHARP.TO) re = _pcre.pcre_compile(pattern, coptions, p_error, p_erroffset, tables) error = p_error[0] erroffset = rffi.cast(lltype.Signed, p_erroffset[0]) lltype.free(p_erroffset, flavor='raw') lltype.free(p_error, flavor='raw') lltype.free(pattern, flavor='raw') # All three raw mallocs above are now freed if not re: raise ExitFunctionWithError("Compilation failed: %s at offset %d" % (rffi.charp2str(error), erroffset)) # If study option was specified, study the pattern and # store the result in extra for passing to pcre_exec. extra = lltype.nullptr(_pcre.pcre_extra) if do_study: soptions = 0 #if _pcre.PCRE_STUDY_JIT_COMPILE is not None: # soptions |= _pcre.PCRE_STUDY_JIT_COMPILE p_error = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw', zero=True) extra = _pcre.pcre_study(re, soptions, p_error) error = p_error[0] lltype.free(p_error, flavor='raw') if error: interp.warn("Error while studying pattern") if not extra: extra = _pcre.hippy_pcre_extra_malloc() rffi.setintfield( extra, 'c_flags', rffi.getintfield(extra, 'c_flags') | _pcre.PCRE_EXTRA_MATCH_LIMIT | _pcre.PCRE_EXTRA_MATCH_LIMIT_RECURSION) capturecount = getfullinfo_int(re, extra, _pcre.PCRE_INFO_CAPTURECOUNT) assert capturecount >= 0 subpat_names = make_subpats_table(capturecount, re, extra) pce = PCE( re, extra, poptions, coptions, # XXX also locale and tables capturecount, subpat_names) interp.space.regex_cache.set(regex, pce) return pce
def str2rawmem(s, array, start, length): ll_s = llstrtype(s) copy_string_to_raw(ll_s, array, start, length)
def f(buf, n): s = 'abc' * n ll_s = llstr(s) copy_string_to_raw(ll_s, buf, 0, n * 3)
def get_compiled_regex_cache(interp, regex): pce = interp.space.regex_cache.get(regex) if pce is not None: return pce if '\x00' in regex: raise ExitFunctionWithError("Null byte in regex") # Parse through the leading whitespace, and display a warning if we # get to the end without encountering a delimiter. i = 0 while i < len(regex) and regex[i].isspace(): i += 1 if i == len(regex): raise ExitFunctionWithError("Empty regular expression") # Get the delimiter and display a warning if it is alphanumeric # or a backslash. delimiter = regex[i] if delimiter.isalnum() or delimiter == '\\': raise ExitFunctionWithError("Delimiter must not be alphanumeric " "or backslash") i += 1 pattern_start = i start_delimiter = delimiter if delimiter == '(': delimiter = ')' elif delimiter == '[': delimiter = ']' elif delimiter == '{': delimiter = '}' elif delimiter == '<': delimiter = '>' end_delimiter = delimiter if start_delimiter == end_delimiter: # We need to iterate through the pattern, searching for the # ending delimiter, but skipping the backslashed delimiters. # If the ending delimiter is not found, display a warning. while i < len(regex): if regex[i] == '\\': i += 1 elif regex[i] == end_delimiter: break i += 1 else: raise ExitFunctionWithError("No ending delimiter '%s' found" % delimiter[:]) else: # We iterate through the pattern, searching for the matching # ending delimiter. For each matching starting delimiter, we # increment nesting level, and decrement it for each matching # ending delimiter. If we reach the end of the pattern without # matching, display a warning. brackets = 1 # brackets nesting level while i < len(regex): if regex[i] == '\\': i += 1 elif regex[i] == end_delimiter: brackets -= 1 if brackets == 0: break elif regex[i] == start_delimiter: brackets += 1 i += 1 else: raise ExitFunctionWithError("No ending matching delimiter '%s' " "found" % delimiter[:]) # Move on to the options pattern_end = i i += 1 # Parse through the options, setting appropriate flags. Display # a warning if we encounter an unknown modifier. coptions = 0 poptions = 0 do_study = False while i < len(regex): option = regex[i] i += 1 # Perl compatible options if option == 'i': coptions |= _pcre.PCRE_CASELESS elif option == 'm': coptions |= _pcre.PCRE_MULTILINE elif option == 's': coptions |= _pcre.PCRE_DOTALL elif option == 'x': coptions |= _pcre.PCRE_EXTENDED # PCRE specific options elif option == 'A': coptions |= _pcre.PCRE_ANCHORED elif option == 'D': coptions |= _pcre.PCRE_DOLLAR_ENDONLY elif option == 'S': do_study = True elif option == 'U': coptions |= _pcre.PCRE_UNGREEDY elif option == 'X': coptions |= _pcre.PCRE_EXTRA elif option == 'u': coptions |= _pcre.PCRE_UTF8 if _pcre.PCRE_UCP is not None: coptions |= _pcre.PCRE_UCP # Custom preg options elif option == 'e': poptions |= PREG_REPLACE_EVAL raise ExitFunctionWithError("The deprecated /e modifier is not " "supported by hippy") elif option == ' ': pass elif option == '\n': pass else: raise ExitFunctionWithError("Unknown modifier '%s'" % option[:]) # XXX missing: #if HAVE_SETLOCALE # if (strcmp(locale, "C")) # tables = pcre_maketables(); #endif # Make a copy of the actual pattern. length = pattern_end - pattern_start pattern = lltype.malloc(rffi.CCHARP.TO, length + 1, flavor='raw') copy_string_to_raw(llstr(regex), pattern, pattern_start, length) pattern[length] = '\x00' # Compile pattern and display a warning if compilation failed. p_error = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw', zero=True) p_erroffset = lltype.malloc(rffi.INTP.TO, 1, flavor='raw', zero=True) tables = lltype.nullptr(rffi.CCHARP.TO) re = _pcre.pcre_compile(pattern, coptions, p_error, p_erroffset, tables) error = p_error[0] erroffset = rffi.cast(lltype.Signed, p_erroffset[0]) lltype.free(p_erroffset, flavor='raw') lltype.free(p_error, flavor='raw') lltype.free(pattern, flavor='raw') # All three raw mallocs above are now freed if not re: raise ExitFunctionWithError("Compilation failed: %s at offset %d" % (rffi.charp2str(error), erroffset)) # If study option was specified, study the pattern and # store the result in extra for passing to pcre_exec. extra = lltype.nullptr(_pcre.pcre_extra) if do_study: soptions = 0 #if _pcre.PCRE_STUDY_JIT_COMPILE is not None: # soptions |= _pcre.PCRE_STUDY_JIT_COMPILE p_error = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw', zero=True) extra = _pcre.pcre_study(re, soptions, p_error) error = p_error[0] lltype.free(p_error, flavor='raw') if error: interp.warn("Error while studying pattern") if not extra: extra = _pcre.hippy_pcre_extra_malloc() rffi.setintfield(extra, 'c_flags', rffi.getintfield(extra, 'c_flags') | _pcre.PCRE_EXTRA_MATCH_LIMIT | _pcre.PCRE_EXTRA_MATCH_LIMIT_RECURSION) capturecount = getfullinfo_int(re, extra, _pcre.PCRE_INFO_CAPTURECOUNT) assert capturecount >= 0 subpat_names = make_subpats_table(capturecount, re, extra) pce = PCE(re, extra, poptions, coptions, # XXX also locale and tables capturecount, subpat_names) interp.space.regex_cache.set(regex, pce) return pce
def setslice(self, start, string): raw_cdata = rffi.ptradd(self.raw_cdata, start) copy_string_to_raw(llstr(string), raw_cdata, 0, len(string))
def f(buf, n): s = 'abc' * n ll_s = llstr(s) copy_string_to_raw(ll_s, buf, 0, n*3)