def unicode_to_unicode_charseq(context, builder, fromty, toty, val): uni_str = cgutils.create_struct_proxy(fromty)(context, builder, value=val) src1 = builder.bitcast(uni_str.data, ir.IntType(8).as_pointer()) src2 = builder.bitcast(uni_str.data, ir.IntType(16).as_pointer()) src4 = builder.bitcast(uni_str.data, ir.IntType(32).as_pointer()) kind1 = builder.icmp_unsigned('==', uni_str.kind, ir.Constant(uni_str.kind.type, 1)) kind2 = builder.icmp_unsigned('==', uni_str.kind, ir.Constant(uni_str.kind.type, 2)) kind4 = builder.icmp_unsigned('==', uni_str.kind, ir.Constant(uni_str.kind.type, 4)) src_length = uni_str.length lty = context.get_value_type(toty) dstint_t = ir.IntType(8 * unicode_byte_width) dst_ptr = cgutils.alloca_once(builder, lty) dst = builder.bitcast(dst_ptr, dstint_t.as_pointer()) dst_length = ir.Constant(src_length.type, toty.count) is_shorter_value = builder.icmp_unsigned('<', src_length, dst_length) count = builder.select(is_shorter_value, src_length, dst_length) with builder.if_then(is_shorter_value): cgutils.memset(builder, dst, ir.Constant(src_length.type, toty.count * unicode_byte_width), 0) with builder.if_then(kind1): with cgutils.for_range(builder, count) as loop: in_ptr = builder.gep(src1, [loop.index]) in_val = builder.zext(builder.load(in_ptr), dstint_t) builder.store(in_val, builder.gep(dst, [loop.index])) with builder.if_then(kind2): if unicode_byte_width >= 2: with cgutils.for_range(builder, count) as loop: in_ptr = builder.gep(src2, [loop.index]) in_val = builder.zext(builder.load(in_ptr), dstint_t) builder.store(in_val, builder.gep(dst, [loop.index])) else: context.call_conv.return_user_exc( builder, ValueError, ("cannot cast 16-bit unicode_type to %s-bit %s" % (unicode_byte_width * 8, toty))) with builder.if_then(kind4): if unicode_byte_width >= 4: with cgutils.for_range(builder, count) as loop: in_ptr = builder.gep(src4, [loop.index]) in_val = builder.zext(builder.load(in_ptr), dstint_t) builder.store(in_val, builder.gep(dst, [loop.index])) else: context.call_conv.return_user_exc( builder, ValueError, ("cannot cast 32-bit unicode_type to %s-bit %s" % (unicode_byte_width * 8, toty))) return builder.load(dst_ptr)
def impl_string_array_single(context, builder, sig, args): typ = sig.return_type string_array = cgutils.create_struct_proxy(typ)(context, builder) if not sig.args: # return empty string array if no args return string_array._getvalue() string_list = ListInstance(context, builder, sig.args[0], args[0]) # get total size of string buffer fnty = lir.FunctionType(lir.IntType(64), [lir.IntType(8).as_pointer()]) fn_len = builder.module.get_or_insert_function(fnty, name="get_str_len") zero = context.get_constant(types.intp, 0) total_size = cgutils.alloca_once_value(builder, zero) string_array.size = string_list.size # loop through all strings and get length with cgutils.for_range(builder, string_list.size) as loop: str_value = string_list.getitem(loop.index) str_len = builder.call(fn_len, [str_value]) builder.store(builder.add(builder.load(total_size), str_len), total_size) # allocate string array fnty = lir.FunctionType(lir.VoidType(), [ lir.IntType(8).as_pointer().as_pointer(), lir.IntType(8).as_pointer().as_pointer(), lir.IntType(64), lir.IntType(64) ]) fn_alloc = builder.module.get_or_insert_function( fnty, name="allocate_string_array") builder.call(fn_alloc, [ string_array._get_ptr_by_name('offsets'), string_array._get_ptr_by_name('data'), string_list.size, builder.load(total_size) ]) # set string array values fnty = lir.FunctionType(lir.VoidType(), [ lir.IntType(8).as_pointer(), lir.IntType(8).as_pointer(), lir.IntType(8).as_pointer(), lir.IntType(64) ]) fn_setitem = builder.module.get_or_insert_function( fnty, name="setitem_string_array") with cgutils.for_range(builder, string_list.size) as loop: str_value = string_list.getitem(loop.index) builder.call( fn_setitem, [string_array.offsets, string_array.data, str_value, loop.index]) return string_array._getvalue()
def box_str(typ, val, c): """ """ string_array = cgutils.create_struct_proxy(typ)(c.context, c.builder, val) # fnty = lir.FunctionType(lir.VoidType(), [lir.IntType(64)]) # fn_print_int = c.builder.module.get_or_insert_function(fnty, # name="print_int") # c.builder.call(fn_print_int, [string_array.size]) string_list = c.pyapi.list_new(string_array.size) res = cgutils.alloca_once(c.builder, lir.IntType(8).as_pointer()) c.builder.store(string_list, res) fnty = lir.FunctionType( lir.IntType(8).as_pointer(), [ lir.IntType(8).as_pointer(), lir.IntType(8).as_pointer(), lir.IntType(64) ]) fn_getitem = c.builder.module.get_or_insert_function( fnty, name="getitem_string_array") with cgutils.for_range(c.builder, string_array.size) as loop: c_str = c.builder.call( fn_getitem, [string_array.offsets, string_array.data, loop.index]) pystr = c.pyapi.string_from_string(c_str) c.pyapi.list_setitem(string_list, loop.index, pystr) c.context.nrt.decref(c.builder, typ, val) return c.builder.load(res)
def setitem_list(context, builder, sig, args): dest = ListInstance(context, builder, sig.args[0], args[0]) src = ListInstance(context, builder, sig.args[2], args[2]) slice = context.make_helper(builder, sig.args[1], args[1]) slicing.guard_invalid_slice(context, builder, sig.args[1], slice) dest.fix_slice(slice) src_size = src.size avail_size = slicing.get_slice_length(builder, slice) size_delta = builder.sub(src.size, avail_size) zero = ir.Constant(size_delta.type, 0) one = ir.Constant(size_delta.type, 1) with builder.if_else(builder.icmp_signed('==', slice.step, one)) as (then, otherwise): with then: # Slice step == 1 => we can resize # Compute the real stop, e.g. for dest[2:0] = [...] real_stop = builder.add(slice.start, avail_size) # Size of the list tail, after the end of slice tail_size = builder.sub(dest.size, real_stop) with builder.if_then(builder.icmp_signed('>', size_delta, zero)): # Grow list then move list tail dest.resize(builder.add(dest.size, size_delta)) dest.move(builder.add(real_stop, size_delta), real_stop, tail_size) with builder.if_then(builder.icmp_signed('<', size_delta, zero)): # Move list tail then shrink list dest.move(builder.add(real_stop, size_delta), real_stop, tail_size) dest.resize(builder.add(dest.size, size_delta)) dest_offset = slice.start with cgutils.for_range(builder, src_size) as loop: value = src.getitem(loop.index) dest.setitem(builder.add(loop.index, dest_offset), value) with otherwise: with builder.if_then(builder.icmp_signed('!=', size_delta, zero)): msg = "cannot resize extended list slice with step != 1" context.call_conv.return_user_exc(builder, ValueError, (msg, )) with cgutils.for_range_slice_generic(builder, slice.start, slice.stop, slice.step) as (pos_range, neg_range): with pos_range as (index, count): value = src.getitem(count) dest.setitem(index, value) with neg_range as (index, count): value = src.getitem(count) dest.setitem(index, value) return context.get_dummy_value()
def codegen(context, builder, sig, args): out_str_arr, in_str_arr = args in_string_array = context.make_helper(builder, string_array_type, in_str_arr) out_string_array = context.make_helper(builder, string_array_type, out_str_arr) n = in_string_array.num_items zero = context.get_constant(offset_typ, 0) curr_offset_ptr = cgutils.alloca_once_value(builder, zero) # XXX: assuming last offset is already set by allocate_string_array # for i in range(n) # if not isna(): # out_offset[curr] = offset[i] with cgutils.for_range(builder, n) as loop: isna = lower_is_na(context, builder, in_string_array.null_bitmap, loop.index) with cgutils.if_likely(builder, builder.not_(isna)): in_val = builder.load( builder.gep(in_string_array.offsets, [loop.index])) curr_offset = builder.load(curr_offset_ptr) builder.store( in_val, builder.gep(out_string_array.offsets, [curr_offset])) builder.store( builder.add( curr_offset, lir.Constant(context.get_data_type(offset_typ), 1)), curr_offset_ptr) return context.get_dummy_value()
def build_ufunc_wrapper(context, func, signature): """ Wrap the scalar function with a loop that iterates over the arguments """ module = func.module byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper = module.add_function(fnty, "__ufunc__." + func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") actual_args = context.get_arguments(func) # Prepare inputs arrays = [] for i, typ in enumerate(signature.args): arrays.append( UArrayArg(context, builder, arg_args, arg_steps, i, context.get_argument_type(typ))) # Prepare output out = UArrayArg(context, builder, arg_args, arg_steps, len(actual_args), context.get_value_type(signature.return_type)) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as ind: # Load elems = [ary.load(ind) for ary in arrays] # Compute status, retval = context.call_function(builder, func, signature.return_type, signature.args, elems) # Ignoring error status and store result # Store if out.byref: retval = builder.load(retval) out.store(retval, ind) builder.ret_void() return wrapper
def list_add(context, builder, sig, args): a = ListInstance(context, builder, sig.args[0], args[0]) b = ListInstance(context, builder, sig.args[1], args[1]) a_size = a.size b_size = b.size nitems = builder.add(a_size, b_size) dest = ListInstance.allocate(context, builder, sig.return_type, nitems) dest.size = nitems with cgutils.for_range(builder, a_size) as src_index: value = a.getitem(src_index) dest.setitem(src_index, value) with cgutils.for_range(builder, b_size) as src_index: value = b.getitem(src_index) dest.setitem(builder.add(src_index, a_size), value) return impl_ret_new_ref(context, builder, sig.return_type, dest.value)
def build_ufunc_wrapper(context, func, signature): """ Wrap the scalar function with a loop that iterates over the arguments """ module = func.module byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper = module.add_function(fnty, "__ufunc__." + func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") actual_args = context.get_arguments(func) # Prepare inputs arrays = [] for i, typ in enumerate(signature.args): arrays.append(UArrayArg(context, builder, arg_args, arg_steps, i, context.get_argument_type(typ))) # Prepare output out = UArrayArg(context, builder, arg_args, arg_steps, len(actual_args), context.get_value_type(signature.return_type)) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as ind: # Load elems = [ary.load(ind) for ary in arrays] # Compute status, retval = context.call_function(builder, func, signature.return_type, signature.args, elems) # Ignoring error status and store result # Store if out.byref: retval = builder.load(retval) out.store(retval, ind) builder.ret_void() return wrapper
def setitem_list(context, builder, sig, args): dest = ListInstance(context, builder, sig.args[0], args[0]) src = ListInstance(context, builder, sig.args[2], args[2]) slice = slicing.make_slice(context, builder, sig.args[1], args[1]) slicing.guard_invalid_slice(context, builder, sig.args[1], slice) dest.fix_slice(slice) src_size = src.size avail_size = slicing.get_slice_length(builder, slice) size_delta = builder.sub(src.size, avail_size) zero = ir.Constant(size_delta.type, 0) one = ir.Constant(size_delta.type, 1) with builder.if_else(builder.icmp_signed('==', slice.step, one)) as (then, otherwise): with then: # Slice step == 1 => we can resize # Compute the real stop, e.g. for dest[2:0] = [...] real_stop = builder.add(slice.start, avail_size) # Size of the list tail, after the end of slice tail_size = builder.sub(dest.size, real_stop) with builder.if_then(builder.icmp_signed('>', size_delta, zero)): # Grow list then move list tail dest.resize(builder.add(dest.size, size_delta)) dest.move(builder.add(real_stop, size_delta), real_stop, tail_size) with builder.if_then(builder.icmp_signed('<', size_delta, zero)): # Move list tail then shrink list dest.move(builder.add(real_stop, size_delta), real_stop, tail_size) dest.resize(builder.add(dest.size, size_delta)) dest_offset = slice.start with cgutils.for_range(builder, src_size) as loop: value = src.getitem(loop.index) dest.setitem(builder.add(loop.index, dest_offset), value) with otherwise: with builder.if_then(builder.icmp_signed('!=', size_delta, zero)): msg = "cannot resize extended list slice with step != 1" context.call_conv.return_user_exc(builder, ValueError, (msg,)) with cgutils.for_range_slice_generic( builder, slice.start, slice.stop, slice.step) as (pos_range, neg_range): with pos_range as (index, count): value = src.getitem(count) dest.setitem(index, value) with neg_range as (index, count): value = src.getitem(count) dest.setitem(index, value) return context.get_dummy_value()
def list_add(context, builder, sig, args): a = ListInstance(context, builder, sig.args[0], args[0]) b = ListInstance(context, builder, sig.args[1], args[1]) a_size = a.size b_size = b.size nitems = builder.add(a_size, b_size) dest = ListInstance.allocate(context, builder, sig.return_type, nitems) dest.size = nitems with cgutils.for_range(builder, a_size) as loop: value = a.getitem(loop.index) dest.setitem(loop.index, value) with cgutils.for_range(builder, b_size) as loop: value = b.getitem(loop.index) dest.setitem(builder.add(loop.index, a_size), value) return impl_ret_new_ref(context, builder, sig.return_type, dest.value)
def list_add(context, builder, sig, args): a = ListInstance(context, builder, sig.args[0], args[0]) b = ListInstance(context, builder, sig.args[1], args[1]) a_size = a.size b_size = b.size nitems = builder.add(a_size, b_size) dest = ListInstance.allocate(context, builder, sig.return_type, nitems) dest.size = nitems with cgutils.for_range(builder, a_size) as loop: value = a.getitem(loop.index) value = context.cast(builder, value, a.dtype, dest.dtype) dest.setitem(loop.index, value, incref=True) with cgutils.for_range(builder, b_size) as loop: value = b.getitem(loop.index) value = context.cast(builder, value, b.dtype, dest.dtype) dest.setitem(builder.add(loop.index, a_size), value, incref=True) return impl_ret_new_ref(context, builder, sig.return_type, dest.value)
def _list_extend_list(context, builder, sig, args): src = ListInstance(context, builder, sig.args[1], args[1]) dest = ListInstance(context, builder, sig.args[0], args[0]) src_size = src.size dest_size = dest.size nitems = builder.add(src_size, dest_size) dest.resize(nitems) dest.size = nitems with cgutils.for_range(builder, src_size) as src_index: value = src.getitem(src_index) dest.setitem(builder.add(src_index, dest_size), value) return dest
def _list_extend_list(context, builder, sig, args): src = ListInstance(context, builder, sig.args[1], args[1]) dest = ListInstance(context, builder, sig.args[0], args[0]) src_size = src.size dest_size = dest.size nitems = builder.add(src_size, dest_size) dest.resize(nitems) dest.size = nitems with cgutils.for_range(builder, src_size) as loop: value = src.getitem(loop.index) dest.setitem(builder.add(loop.index, dest_size), value) return dest
def _list_extend_list(context, builder, sig, args): src = ListInstance(context, builder, sig.args[1], args[1]) dest = ListInstance(context, builder, sig.args[0], args[0]) src_size = src.size dest_size = dest.size nitems = builder.add(src_size, dest_size) dest.resize(nitems) dest.size = nitems with cgutils.for_range(builder, src_size) as loop: value = src.getitem(loop.index) value = context.cast(builder, value, src.dtype, dest.dtype) dest.setitem(builder.add(loop.index, dest_size), value, incref=True) return dest
def unbox_datetime_date_array(typ, val, c): # n = object_length(c, val) #cgutils.printf(c.builder, "len %d\n", n) arr_typ = types.Array(types.intp, 1, 'C') out_arr = _empty_nd_impl(c.context, c.builder, arr_typ, [n]) with cgutils.for_range(c.builder, n) as loop: dt_date = sequence_getitem(c, val, loop.index) int_date = unbox_datetime_date(datetime_date_type, dt_date, c).value dataptr, shapes, strides = basic_indexing( c.context, c.builder, arr_typ, out_arr, (types.intp,), (loop.index,)) store_item(c.context, c.builder, arr_typ, int_date, dataptr) is_error = cgutils.is_not_null(c.builder, c.pyapi.err_occurred()) return NativeValue(out_arr._getvalue(), is_error=is_error)
def list_mul_inplace(context, builder, sig, args): inst = ListInstance(context, builder, sig.args[0], args[0]) src_size = inst.size mult = args[1] zero = ir.Constant(mult.type, 0) mult = builder.select(cgutils.is_neg_int(builder, mult), zero, mult) nitems = builder.mul(mult, src_size) inst.resize(nitems) with cgutils.for_range_slice(builder, src_size, nitems, src_size, inc=True) as (dest_offset, _): with cgutils.for_range(builder, src_size) as loop: value = inst.getitem(loop.index) inst.setitem(builder.add(loop.index, dest_offset), value) return impl_ret_borrowed(context, builder, sig.return_type, inst.value)
def _iterate(self, start=None): """ Iterate over the payload's entries. Yield a SetLoop. """ context = self._context builder = self._builder intp_t = context.get_value_type(types.intp) one = ir.Constant(intp_t, 1) size = builder.add(self.mask, one) with cgutils.for_range(builder, size, start=start) as range_loop: entry = self.get_entry(range_loop.index) is_used = is_hash_used(context, builder, entry.hash) with builder.if_then(is_used): loop = SetLoop(index=range_loop.index, entry=entry, do_break=range_loop.do_break) yield loop
def string_split_impl(context, builder, sig, args): nitems = cgutils.alloca_once(builder, lir.IntType(64)) # input str, sep, size pointer fnty = lir.FunctionType(lir.IntType(8).as_pointer().as_pointer(), [lir.IntType(8).as_pointer(), lir.IntType(8).as_pointer(), lir.IntType(64).as_pointer()]) fn = builder.module.get_or_insert_function(fnty, name="str_split") ptr = builder.call(fn, args+[nitems]) size = builder.load(nitems) # TODO: use ptr instead of allocating and copying, use NRT_MemInfo_new # TODO: deallocate ptr _list = numba.targets.listobj.ListInstance.allocate(context, builder, sig.return_type, size) _list.size = size with cgutils.for_range(builder, size) as loop: value = builder.load(cgutils.gep_inbounds(builder, ptr, loop.index)) _list.setitem(loop.index, value) return impl_ret_new_ref(context, builder, sig.return_type, _list.value)
def build_set(context, builder, set_type, items): """ Build a set of the given type, containing the given items. """ nitems = len(items) inst = SetInstance.allocate(context, builder, set_type, nitems) # Populate set. Inlining the insertion code for each item would be very # costly, instead we create a LLVM array and iterate over it. array = cgutils.pack_array(builder, items) array_ptr = cgutils.alloca_once_value(builder, array) count = context.get_constant(types.intp, nitems) with cgutils.for_range(builder, count) as loop: item = builder.load(cgutils.gep(builder, array_ptr, 0, loop.index)) inst.add(item) return impl_ret_new_ref(context, builder, set_type, inst.value)
def list_mul(context, builder, sig, args): src = ListInstance(context, builder, sig.args[0], args[0]) src_size = src.size mult = args[1] zero = ir.Constant(mult.type, 0) mult = builder.select(cgutils.is_neg_int(builder, mult), zero, mult) nitems = builder.mul(mult, src_size) dest = ListInstance.allocate(context, builder, sig.return_type, nitems) dest.size = nitems with cgutils.for_range_slice(builder, zero, nitems, src_size, inc=True) as (dest_offset, _): with cgutils.for_range(builder, src_size) as loop: value = src.getitem(loop.index) dest.setitem(builder.add(loop.index, dest_offset), value) return impl_ret_new_ref(context, builder, sig.return_type, dest.value)
def random_arr(context, builder, sig, args, typing_key=typing_key): from . import arrayobj arrty = sig.return_type dtype = arrty.dtype scalar_sig = signature(dtype, *sig.args[:-1]) scalar_args = args[:-1] # Allocate array... shapes = arrayobj._parse_shape(context, builder, sig.args[-1], args[-1]) arr = arrayobj._empty_nd_impl(context, builder, arrty, shapes) # ... and populate it in natural order scalar_impl = context.get_function(typing_key, scalar_sig) with cgutils.for_range(builder, arr.nitems) as loop: val = scalar_impl(builder, scalar_args) ptr = cgutils.gep(builder, arr.data, loop.index) arrayobj.store_item(context, builder, arrty, val, ptr) return impl_ret_new_ref(context, builder, sig.return_type, arr._getvalue())
def box_str(typ, val, c): """ """ dtype = StringArrayPayloadType() inst_struct = c.context.make_helper(c.builder, typ, val) data_pointer = c.context.nrt.meminfo_data(c.builder, inst_struct.meminfo) # cgutils.printf(builder, "data [%p]\n", data_pointer) data_pointer = c.builder.bitcast( data_pointer, c.context.get_data_type(dtype).as_pointer()) string_array = cgutils.create_struct_proxy(dtype)( c.context, c.builder, c.builder.load(data_pointer)) # fnty = lir.FunctionType(lir.VoidType(), [lir.IntType(64)]) # fn_print_int = c.builder.module.get_or_insert_function(fnty, # name="print_int") # c.builder.call(fn_print_int, [string_array.size]) string_list = c.pyapi.list_new(string_array.size) res = cgutils.alloca_once(c.builder, lir.IntType(8).as_pointer()) c.builder.store(string_list, res) fnty = lir.FunctionType( lir.IntType(8).as_pointer(), [ lir.IntType(8).as_pointer(), lir.IntType(8).as_pointer(), lir.IntType(64) ]) fn_getitem = c.builder.module.get_or_insert_function( fnty, name="getitem_string_array") with cgutils.for_range(c.builder, string_array.size) as loop: c_str = c.builder.call( fn_getitem, [string_array.offsets, string_array.data, loop.index]) pystr = c.pyapi.string_from_string(c_str) c.pyapi.list_setitem(string_list, loop.index, pystr) c.context.nrt.decref(c.builder, typ, val) return c.builder.load(res)
def list_eq(context, builder, sig, args): aty, bty = sig.args a = ListInstance(context, builder, aty, args[0]) b = ListInstance(context, builder, bty, args[1]) a_size = a.size same_size = builder.icmp_signed("==", a_size, b.size) res = cgutils.alloca_once_value(builder, same_size) with builder.if_then(same_size): with cgutils.for_range(builder, a_size) as loop: v = a.getitem(loop.index) w = b.getitem(loop.index) itemres = context.generic_compare(builder, "==", (aty.dtype, bty.dtype), (v, w)) with builder.if_then(builder.not_(itemres)): # Exit early builder.store(cgutils.false_bit, res) loop.do_break() return builder.load(res)
def bytes_to_charseq(context, builder, fromty, toty, val): barr = cgutils.create_struct_proxy(fromty)(context, builder, value=val) src = builder.bitcast(barr.data, ir.IntType(8).as_pointer()) src_length = barr.nitems lty = context.get_value_type(toty) dstint_t = ir.IntType(8) dst_ptr = cgutils.alloca_once(builder, lty) dst = builder.bitcast(dst_ptr, dstint_t.as_pointer()) dst_length = ir.Constant(src_length.type, toty.count) is_shorter_value = builder.icmp_unsigned('<', src_length, dst_length) count = builder.select(is_shorter_value, src_length, dst_length) with builder.if_then(is_shorter_value): cgutils.memset(builder, dst, ir.Constant(src_length.type, toty.count), 0) with cgutils.for_range(builder, count) as loop: in_ptr = builder.gep(src, [loop.index]) in_val = builder.zext(builder.load(in_ptr), dstint_t) builder.store(in_val, builder.gep(dst, [loop.index])) return builder.load(dst_ptr)
def list_eq(context, builder, sig, args): aty, bty = sig.args a = ListInstance(context, builder, aty, args[0]) b = ListInstance(context, builder, bty, args[1]) a_size = a.size same_size = builder.icmp_signed('==', a_size, b.size) res = cgutils.alloca_once_value(builder, same_size) with builder.if_then(same_size): with cgutils.for_range(builder, a_size) as loop: v = a.getitem(loop.index) w = b.getitem(loop.index) itemres = context.generic_compare(builder, '==', (aty.dtype, bty.dtype), (v, w)) with builder.if_then(builder.not_(itemres)): # Exit early builder.store(cgutils.false_bit, res) loop.do_break() return builder.load(res)
def build_ufunc_wrapper(context, func, signature): """ Wrap the scalar function with a loop that iterates over the arguments """ module = func.module byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper = module.add_function(fnty, "__ufunc__." + func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") actual_args = context.get_arguments(func) # Prepare inputs arrays = [] for i, typ in enumerate(signature.args): arrays.append( UArrayArg(context, builder, arg_args, arg_steps, i, context.get_argument_type(typ))) # Prepare output valty = context.get_data_type(signature.return_type) out = UArrayArg(context, builder, arg_args, arg_steps, len(actual_args), valty) # Setup indices offsets = [] zero = context.get_constant(types.intp, 0) for _ in arrays: p = cgutils.alloca_once(builder, intp_t) offsets.append(p) builder.store(zero, p) store_offset = cgutils.alloca_once(builder, intp_t) builder.store(zero, store_offset) unit_strided = cgutils.true_bit for ary in arrays: unit_strided = builder.and_(unit_strided, ary.is_unit_strided) with cgutils.ifelse(builder, unit_strided) as (is_unit_strided, is_strided): with is_unit_strided: with cgutils.for_range(builder, loopcount, intp=intp_t) as ind: fastloop = build_fast_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, ind) builder.ret_void() with is_strided: # General loop with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_slow_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature) builder.ret_void() builder.ret_void() del builder # Set core function to internal so that it is not generated func.linkage = LINKAGE_INTERNAL # Force inline of code function inline_function(slowloop) inline_function(fastloop) # Run optimizer context.optimize(module) if config.DUMP_OPTIMIZED: print(module) return wrapper
def _lookup(self, item, h, for_insert=False): """ Lookup the *item* with the given hash values in the entries. Return a (found, entry index) tuple: - If found is true, <entry index> points to the entry containing the item. - If found is false, <entry index> points to the empty entry that the item can be written to (only if *for_insert* is true) """ context = self._context builder = self._builder intp_t = h.type mask = self.mask dtype = self._ty.dtype eqfn = context.get_function( '==', typing.signature(types.boolean, dtype, dtype)) one = ir.Constant(intp_t, 1) five = ir.Constant(intp_t, 5) # The perturbation value for probing perturb = cgutils.alloca_once_value(builder, h) # The index of the entry being considered: start with (hash & mask) index = cgutils.alloca_once_value(builder, builder.and_(h, mask)) if for_insert: # The index of the first deleted entry in the lookup chain free_index_sentinel = mask.type(-1) # highest unsigned index free_index = cgutils.alloca_once_value(builder, free_index_sentinel) bb_body = builder.append_basic_block("lookup.body") bb_found = builder.append_basic_block("lookup.found") bb_not_found = builder.append_basic_block("lookup.not_found") bb_end = builder.append_basic_block("lookup.end") def check_entry(i): """ Check entry *i* against the value being searched for. """ entry = self.get_entry(i) entry_hash = entry.hash with builder.if_then(builder.icmp_unsigned('==', h, entry_hash)): # Hashes are equal, compare values # (note this also ensures the entry is used) eq = eqfn(builder, (item, entry.key)) with builder.if_then(eq): builder.branch(bb_found) with builder.if_then(is_hash_empty(context, builder, entry_hash)): builder.branch(bb_not_found) if for_insert: # Memorize the index of the first deleted entry with builder.if_then( is_hash_deleted(context, builder, entry_hash)): j = builder.load(free_index) j = builder.select( builder.icmp_unsigned('==', j, free_index_sentinel), i, j) builder.store(j, free_index) # First linear probing. When the number of collisions is small, # the lineary probing loop achieves better cache locality and # is also slightly cheaper computationally. with cgutils.for_range(builder, ir.Constant(intp_t, LINEAR_PROBES)): i = builder.load(index) check_entry(i) i = builder.add(i, one) i = builder.and_(i, mask) builder.store(i, index) # If not found after linear probing, switch to a non-linear # perturbation keyed on the unmasked hash value. # XXX how to tell LLVM this branch is unlikely? builder.branch(bb_body) with builder.goto_block(bb_body): i = builder.load(index) check_entry(i) # Perturb to go to next entry: # perturb >>= 5 # i = (i * 5 + 1 + perturb) & mask p = builder.load(perturb) p = builder.lshr(p, five) i = builder.add(one, builder.mul(i, five)) i = builder.and_(mask, builder.add(i, p)) builder.store(i, index) builder.store(p, perturb) # Loop builder.branch(bb_body) with builder.goto_block(bb_not_found): if for_insert: # Not found => for insertion, return the index of the first # deleted entry (if any), to avoid creating an infinite # lookup chain (issue #1913). i = builder.load(index) j = builder.load(free_index) i = builder.select( builder.icmp_unsigned('==', j, free_index_sentinel), i, j) builder.store(i, index) builder.branch(bb_end) with builder.goto_block(bb_found): builder.branch(bb_end) builder.position_at_end(bb_end) found = builder.phi(ir.IntType(1), 'found') found.add_incoming(cgutils.true_bit, bb_found) found.add_incoming(cgutils.false_bit, bb_not_found) return found, builder.load(index)
def box_str_arr_split_view(typ, val, c): context = c.context builder = c.builder sp_view = context.make_helper(builder, string_array_split_view_type, val) # create array of objects with num_items shape mod_name = c.context.insert_const_string(c.builder.module, "numpy") np_class_obj = c.pyapi.import_module_noblock(mod_name) dtype = c.pyapi.object_getattr_string(np_class_obj, 'object_') l_num_items = builder.sext(sp_view.num_items, c.pyapi.longlong) num_items_obj = c.pyapi.long_from_longlong(l_num_items) out_arr = c.pyapi.call_method(np_class_obj, "ndarray", (num_items_obj, dtype)) # Array setitem call arr_get_fnty = LLType.function( lir.IntType(8).as_pointer(), [c.pyapi.pyobj, c.pyapi.py_ssize_t]) arr_get_fn = c.pyapi._get_function(arr_get_fnty, name="array_getptr1") arr_setitem_fnty = LLType.function( lir.VoidType(), [c.pyapi.pyobj, lir.IntType(8).as_pointer(), c.pyapi.pyobj]) arr_setitem_fn = c.pyapi._get_function(arr_setitem_fnty, name="array_setitem") # for each string with cgutils.for_range(builder, sp_view.num_items) as loop: str_ind = loop.index # start and end offset of string's list in index_offsets # sp_view.index_offsets[str_ind] list_start_offset = builder.sext( builder.load(builder.gep(sp_view.index_offsets, [str_ind])), lir.IntType(64)) # sp_view.index_offsets[str_ind+1] list_end_offset = builder.sext( builder.load( builder.gep(sp_view.index_offsets, [builder.add(str_ind, str_ind.type(1))])), lir.IntType(64)) # cgutils.printf(builder, "%d %d\n", list_start, list_end) # Build a new Python list nitems = builder.sub(list_end_offset, list_start_offset) nitems = builder.sub(nitems, nitems.type(1)) # cgutils.printf(builder, "str %lld n %lld\n", str_ind, nitems) list_obj = c.pyapi.list_new(nitems) with c.builder.if_then(cgutils.is_not_null(c.builder, list_obj), likely=True): with cgutils.for_range(c.builder, nitems) as loop: # data_offsets of current list start_index = builder.add(list_start_offset, loop.index) data_start = builder.load( builder.gep(sp_view.data_offsets, [start_index])) # add 1 since starts from -1 data_start = builder.add(data_start, data_start.type(1)) data_end = builder.load( builder.gep( sp_view.data_offsets, [builder.add(start_index, start_index.type(1))])) # cgutils.printf(builder, "ind %lld %lld\n", data_start, data_end) data_ptr = builder.gep(builder.extract_value(sp_view.data, 0), [data_start]) str_size = builder.sext(builder.sub(data_end, data_start), lir.IntType(64)) str_obj = c.pyapi.string_from_string_and_size( data_ptr, str_size) c.pyapi.list_setitem(list_obj, loop.index, str_obj) arr_ptr = builder.call(arr_get_fn, [out_arr, str_ind]) builder.call(arr_setitem_fn, [out_arr, arr_ptr, list_obj]) c.pyapi.decref(np_class_obj) return out_arr
def build_gufunc_wrapper(context, func, signature, sin, sout): module = func.module byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper = module.add_function(fnty, "__gufunc__." + func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") # Unpack shapes unique_syms = set() for grp in (sin, sout): for syms in grp: unique_syms |= set(syms) sym_map = {} for grp in (sin, sout): for syms in sin: for s in syms: if s not in sym_map: sym_map[s] = len(sym_map) sym_dim = {} for s, i in sym_map.items(): sym_dim[s] = builder.load( builder.gep(arg_dims, [context.get_constant(types.intp, i + 1)])) # Prepare inputs arrays = [] step_offset = len(sin) + len(sout) for i, (typ, sym) in enumerate(zip(signature.args, sin + sout)): ary = GUArrayArg(context, builder, arg_args, arg_dims, arg_steps, i, step_offset, typ, sym, sym_dim) step_offset += ary.ndim arrays.append(ary) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as ind: args = [a.array_value for a in arrays] status, retval = context.call_function(builder, func, signature.return_type, signature.args, args) # ignore status # ignore retval for a in arrays: a.next(ind) builder.ret_void() wrapper.verify() return wrapper
def _python_array_obj_to_native_list(typ, obj, c, size, listptr, errorptr): """ Construct a new native list from a Python array of objects. copied from _python_list_to_native but list_getitem is converted to array getitem. """ def check_element_type(nth, itemobj, expected_typobj): typobj = nth.typeof(itemobj) # Check if *typobj* is NULL with c.builder.if_then( cgutils.is_null(c.builder, typobj), likely=False, ): c.builder.store(cgutils.true_bit, errorptr) loop.do_break() # Mandate that objects all have the same exact type type_mismatch = c.builder.icmp_signed('!=', typobj, expected_typobj) with c.builder.if_then(type_mismatch, likely=False): c.builder.store(cgutils.true_bit, errorptr) c.pyapi.err_format( "PyExc_TypeError", "can't unbox heterogeneous list: %S != %S", expected_typobj, typobj, ) c.pyapi.decref(typobj) loop.do_break() c.pyapi.decref(typobj) # Allocate a new native list ok, list = listobj.ListInstance.allocate_ex(c.context, c.builder, typ, size) # Array getitem call arr_get_fnty = LLType.function(LLType.pointer(c.pyapi.pyobj), [c.pyapi.pyobj, c.pyapi.py_ssize_t]) arr_get_fn = c.pyapi._get_function(arr_get_fnty, name="array_getptr1") with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok): with if_ok: list.size = size zero = lir.Constant(size.type, 0) with c.builder.if_then(c.builder.icmp_signed('>', size, zero), likely=True): # Traverse Python list and unbox objects into native list with _NumbaTypeHelper(c) as nth: # Note: *expected_typobj* can't be NULL # TODO: enable type checking when emty list item in # list(list(str)) case can be handled # expected_typobj = nth.typeof(c.builder.load( # c.builder.call(arr_get_fn, [obj, zero]))) with cgutils.for_range(c.builder, size) as loop: itemobj = c.builder.call(arr_get_fn, [obj, loop.index]) # extra load since we have ptr to object itemobj = c.builder.load(itemobj) # c.pyapi.print_object(itemobj) # check_element_type(nth, itemobj, expected_typobj) # XXX we don't call native cleanup for each # list element, since that would require keeping # of which unboxings have been successful. native = c.unbox(typ.dtype, itemobj) with c.builder.if_then(native.is_error, likely=False): c.builder.store(cgutils.true_bit, errorptr) loop.do_break() # The object (e.g. string) is stored so incref=True list.setitem(loop.index, native.value, incref=True) # c.pyapi.decref(expected_typobj) if typ.reflected: list.parent = obj # Stuff meminfo pointer into the Python object for # later reuse. with c.builder.if_then(c.builder.not_(c.builder.load(errorptr)), likely=False): c.pyapi.object_set_private_data(obj, list.meminfo) list.set_dirty(False) c.builder.store(list.value, listptr) with if_not_ok: c.builder.store(cgutils.true_bit, errorptr) # If an error occurred, drop the whole native list with c.builder.if_then(c.builder.load(errorptr)): c.context.nrt.decref(c.builder, typ, list.value)
def build(self): module = self.func.module byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = self.context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper = module.add_function(fnty, "__gufunc__." + self.func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") # Unpack shapes unique_syms = set() for grp in (self.sin, self.sout): for syms in grp: unique_syms |= set(syms) sym_map = {} for syms in self.sin: for s in syms: if s not in sym_map: sym_map[s] = len(sym_map) sym_dim = {} for s, i in sym_map.items(): sym_dim[s] = builder.load(builder.gep(arg_dims, [self.context.get_constant( types.intp, i + 1)])) # Prepare inputs arrays = [] step_offset = len(self.sin) + len(self.sout) for i, (typ, sym) in enumerate(zip(self.signature.args, self.sin + self.sout)): ary = GUArrayArg(self.context, builder, arg_args, arg_dims, arg_steps, i, step_offset, typ, sym, sym_dim) if not ary.as_scalar: step_offset += ary.ndim arrays.append(ary) bbreturn = cgutils.get_function(builder).append_basic_block('.return') # Prologue self.gen_prologue(builder) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as ind: args = [a.array_value for a in arrays] innercall, error = self.gen_loop_body(builder, args) # If error, escape cgutils.cbranch_or_continue(builder, error, bbreturn) for a in arrays: a.next(ind) builder.branch(bbreturn) builder.position_at_end(bbreturn) # Epilogue self.gen_epilogue(builder) builder.ret_void() module.verify() # Set core function to internal so that it is not generated self.func.linkage = LINKAGE_INTERNAL # Force inline of code function inline_function(innercall) # Run optimizer self.context.optimize(module) if config.DUMP_OPTIMIZED: print(module) wrapper.verify() return wrapper, self.env
def build_ufunc_wrapper(library, context, fname, signature, objmode, envptr, env): """ Wrap the scalar function with a loop that iterates over the arguments """ assert isinstance(fname, str) byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapperlib = context.codegen().create_library('ufunc_wrapper') wrapper_module = wrapperlib.create_ir_module('') if objmode: func_type = context.call_conv.get_function_type( types.pyobject, [types.pyobject] * len(signature.args)) else: func_type = context.call_conv.get_function_type( signature.return_type, signature.args) func = wrapper_module.add_function(func_type, name=fname) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, "__ufunc__." + func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") # Prepare inputs arrays = [] for i, typ in enumerate(signature.args): arrays.append(UArrayArg(context, builder, arg_args, arg_steps, i, typ)) # Prepare output out = UArrayArg(context, builder, arg_args, arg_steps, len(arrays), signature.return_type) # Setup indices offsets = [] zero = context.get_constant(types.intp, 0) for _ in arrays: p = cgutils.alloca_once(builder, intp_t) offsets.append(p) builder.store(zero, p) store_offset = cgutils.alloca_once(builder, intp_t) builder.store(zero, store_offset) unit_strided = cgutils.true_bit for ary in arrays: unit_strided = builder.and_(unit_strided, ary.is_unit_strided) pyapi = context.get_python_api(builder) if objmode: # General loop gil = pyapi.gil_ensure() with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_obj_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, pyapi, envptr, env) pyapi.gil_release(gil) builder.ret_void() else: with builder.if_else(unit_strided) as (is_unit_strided, is_strided): with is_unit_strided: with cgutils.for_range(builder, loopcount, intp=intp_t) as loop: fastloop = build_fast_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, loop.index, pyapi) with is_strided: # General loop with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_slow_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, pyapi) builder.ret_void() del builder # Link and finalize wrapperlib.add_ir_module(wrapper_module) wrapperlib.add_linking_library(library) return wrapperlib.get_pointer_to_function(wrapper.name)
def build_gufunc_wrapper(context, func, signature, sin, sout): module = func.module byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper = module.add_function(fnty, "__gufunc__." + func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") # Unpack shapes unique_syms = set() for grp in (sin, sout): for syms in grp: unique_syms |= set(syms) sym_map = {} for grp in (sin, sout): for syms in sin: for s in syms: if s not in sym_map: sym_map[s] = len(sym_map) sym_dim = {} for s, i in sym_map.items(): sym_dim[s] = builder.load(builder.gep(arg_dims, [context.get_constant(types.intp, i + 1)])) # Prepare inputs arrays = [] step_offset = len(sin) + len(sout) for i, (typ, sym) in enumerate(zip(signature.args, sin + sout)): ary = GUArrayArg(context, builder, arg_args, arg_dims, arg_steps, i, step_offset, typ, sym, sym_dim) if not ary.as_scalar: step_offset += ary.ndim arrays.append(ary) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as ind: args = [a.array_value for a in arrays] status, retval = context.call_function(builder, func, signature.return_type, signature.args, args) # ignore status # ignore retval for a in arrays: a.next(ind) builder.ret_void() # Set core function to internal so that it is not generated func.linkage = LINKAGE_INTERNAL # Force inline of code function inline_function(status.code) # Run optimizer context.optimize(module) if config.DUMP_OPTIMIZED: print(module) wrapper.verify() return wrapper
def _lookup(self, item, h, for_insert=False): """ Lookup the *item* with the given hash values in the entries. Return a (found, entry index) tuple: - If found is true, <entry index> points to the entry containing the item. - If found is false, <entry index> points to the empty entry that the item can be written to (only if *for_insert* is true) """ context = self._context builder = self._builder intp_t = h.type mask = self.mask dtype = self._ty.dtype eqfn = context.get_function('==', typing.signature(types.boolean, dtype, dtype)) one = ir.Constant(intp_t, 1) five = ir.Constant(intp_t, 5) # The perturbation value for probing perturb = cgutils.alloca_once_value(builder, h) # The index of the entry being considered: start with (hash & mask) index = cgutils.alloca_once_value(builder, builder.and_(h, mask)) if for_insert: # The index of the first deleted entry in the lookup chain free_index_sentinel = mask.type(-1) # highest unsigned index free_index = cgutils.alloca_once_value(builder, free_index_sentinel) bb_body = builder.append_basic_block("lookup.body") bb_found = builder.append_basic_block("lookup.found") bb_not_found = builder.append_basic_block("lookup.not_found") bb_end = builder.append_basic_block("lookup.end") def check_entry(i): """ Check entry *i* against the value being searched for. """ entry = self.get_entry(i) entry_hash = entry.hash with builder.if_then(builder.icmp_unsigned('==', h, entry_hash)): # Hashes are equal, compare values # (note this also ensures the entry is used) eq = eqfn(builder, (item, entry.key)) with builder.if_then(eq): builder.branch(bb_found) with builder.if_then(is_hash_empty(context, builder, entry_hash)): builder.branch(bb_not_found) if for_insert: # Memorize the index of the first deleted entry with builder.if_then(is_hash_deleted(context, builder, entry_hash)): j = builder.load(free_index) j = builder.select(builder.icmp_unsigned('==', j, free_index_sentinel), i, j) builder.store(j, free_index) # First linear probing. When the number of collisions is small, # the lineary probing loop achieves better cache locality and # is also slightly cheaper computationally. with cgutils.for_range(builder, ir.Constant(intp_t, LINEAR_PROBES)): i = builder.load(index) check_entry(i) i = builder.add(i, one) i = builder.and_(i, mask) builder.store(i, index) # If not found after linear probing, switch to a non-linear # perturbation keyed on the unmasked hash value. # XXX how to tell LLVM this branch is unlikely? builder.branch(bb_body) with builder.goto_block(bb_body): i = builder.load(index) check_entry(i) # Perturb to go to next entry: # perturb >>= 5 # i = (i * 5 + 1 + perturb) & mask p = builder.load(perturb) p = builder.lshr(p, five) i = builder.add(one, builder.mul(i, five)) i = builder.and_(mask, builder.add(i, p)) builder.store(i, index) builder.store(p, perturb) # Loop builder.branch(bb_body) with builder.goto_block(bb_not_found): if for_insert: # Not found => for insertion, return the index of the first # deleted entry (if any), to avoid creating an infinite # lookup chain (issue #1913). i = builder.load(index) j = builder.load(free_index) i = builder.select(builder.icmp_unsigned('==', j, free_index_sentinel), i, j) builder.store(i, index) builder.branch(bb_end) with builder.goto_block(bb_found): builder.branch(bb_end) builder.position_at_end(bb_end) found = builder.phi(ir.IntType(1), 'found') found.add_incoming(cgutils.true_bit, bb_found) found.add_incoming(cgutils.false_bit, bb_not_found) return found, builder.load(index)
def build(self): byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = self.context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper_module = self.library.create_ir_module('') func_type = self.call_conv.get_function_type(self.fndesc.restype, self.fndesc.argtypes) func = wrapper_module.add_function(func_type, name=self.func.name) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, "__gufunc__." + self.func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") pyapi = self.context.get_python_api(builder) # Unpack shapes unique_syms = set() for grp in (self.sin, self.sout): for syms in grp: unique_syms |= set(syms) sym_map = {} for syms in self.sin: for s in syms: if s not in sym_map: sym_map[s] = len(sym_map) sym_dim = {} for s, i in sym_map.items(): sym_dim[s] = builder.load(builder.gep(arg_dims, [self.context.get_constant( types.intp, i + 1)])) # Prepare inputs arrays = [] step_offset = len(self.sin) + len(self.sout) for i, (typ, sym) in enumerate(zip(self.signature.args, self.sin + self.sout)): ary = GUArrayArg(self.context, builder, arg_args, arg_steps, i, step_offset, typ, sym, sym_dim) step_offset += len(sym) arrays.append(ary) bbreturn = builder.append_basic_block('.return') # Prologue self.gen_prologue(builder, pyapi) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as loop: args = [a.get_array_at_offset(loop.index) for a in arrays] innercall, error = self.gen_loop_body(builder, pyapi, func, args) # If error, escape cgutils.cbranch_or_continue(builder, error, bbreturn) builder.branch(bbreturn) builder.position_at_end(bbreturn) # Epilogue self.gen_epilogue(builder, pyapi) builder.ret_void() self.library.add_ir_module(wrapper_module) wrapper = self.library.get_function(wrapper.name) # Set core function to internal so that it is not generated self.func.linkage = LINKAGE_INTERNAL return wrapper, self.env
def build_ufunc_wrapper(library, context, func, signature, objmode, envptr, env): """ Wrap the scalar function with a loop that iterates over the arguments """ byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper_module = library.create_ir_module('') if objmode: func_type = context.call_conv.get_function_type( types.pyobject, [types.pyobject] * len(signature.args)) else: func_type = context.call_conv.get_function_type( signature.return_type, signature.args) oldfunc = func func = wrapper_module.add_function(func_type, name=func.name) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, "__ufunc__." + func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") # Prepare inputs arrays = [] for i, typ in enumerate(signature.args): arrays.append(UArrayArg(context, builder, arg_args, arg_steps, i, typ)) # Prepare output out = UArrayArg(context, builder, arg_args, arg_steps, len(arrays), signature.return_type) # Setup indices offsets = [] zero = context.get_constant(types.intp, 0) for _ in arrays: p = cgutils.alloca_once(builder, intp_t) offsets.append(p) builder.store(zero, p) store_offset = cgutils.alloca_once(builder, intp_t) builder.store(zero, store_offset) unit_strided = cgutils.true_bit for ary in arrays: unit_strided = builder.and_(unit_strided, ary.is_unit_strided) pyapi = context.get_python_api(builder) if objmode: # General loop gil = pyapi.gil_ensure() with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_obj_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, pyapi, envptr, env) pyapi.gil_release(gil) builder.ret_void() else: with builder.if_else(unit_strided) as (is_unit_strided, is_strided): with is_unit_strided: with cgutils.for_range(builder, loopcount, intp=intp_t) as loop: fastloop = build_fast_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, loop.index, pyapi) with is_strided: # General loop with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_slow_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, pyapi) builder.ret_void() del builder # Run optimizer library.add_ir_module(wrapper_module) wrapper = library.get_function(wrapper.name) return wrapper
def _build_wrapper(self, library, name): """ The LLVM IRBuilder code to create the gufunc wrapper. The *library* arg is the CodeLibrary for which the wrapper should be added to. The *name* arg is the name of the wrapper function being created. """ byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = self.context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper_module = library.create_ir_module('') func_type = self.call_conv.get_function_type(self.fndesc.restype, self.fndesc.argtypes) fname = self.fndesc.llvm_func_name func = wrapper_module.add_function(func_type, name=fname) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") pyapi = self.context.get_python_api(builder) # Unpack shapes unique_syms = set() for grp in (self.sin, self.sout): for syms in grp: unique_syms |= set(syms) sym_map = {} for syms in self.sin: for s in syms: if s not in sym_map: sym_map[s] = len(sym_map) sym_dim = {} for s, i in sym_map.items(): sym_dim[s] = builder.load(builder.gep(arg_dims, [self.context.get_constant( types.intp, i + 1)])) # Prepare inputs arrays = [] step_offset = len(self.sin) + len(self.sout) for i, (typ, sym) in enumerate(zip(self.signature.args, self.sin + self.sout)): ary = GUArrayArg(self.context, builder, arg_args, arg_steps, i, step_offset, typ, sym, sym_dim) step_offset += len(sym) arrays.append(ary) bbreturn = builder.append_basic_block('.return') # Prologue self.gen_prologue(builder, pyapi) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as loop: args = [a.get_array_at_offset(loop.index) for a in arrays] innercall, error = self.gen_loop_body(builder, pyapi, func, args) # If error, escape cgutils.cbranch_or_continue(builder, error, bbreturn) builder.branch(bbreturn) builder.position_at_end(bbreturn) # Epilogue self.gen_epilogue(builder, pyapi) builder.ret_void() # Link library.add_ir_module(wrapper_module) library.add_linking_library(self.library)
def impl_string_array_single(context, builder, sig, args): typ = sig.return_type zero = context.get_constant(types.intp, 0) meminfo, meminfo_data_ptr = construct_string_array(context, builder) str_arr_payload = cgutils.create_struct_proxy(str_arr_payload_type)( context, builder) if not sig.args: # return empty string array if no args # XXX alloc empty arrays for dtor to safely delete? builder.store(str_arr_payload._getvalue(), meminfo_data_ptr) string_array = context.make_helper(builder, typ) string_array.meminfo = meminfo string_array.num_items = zero string_array.num_total_chars = zero ret = string_array._getvalue() #context.nrt.decref(builder, ty, ret) return impl_ret_new_ref(context, builder, typ, ret) string_list = ListInstance(context, builder, sig.args[0], args[0]) # get total size of string buffer fnty = lir.FunctionType(lir.IntType(64), [lir.IntType(8).as_pointer()]) fn_len = builder.module.get_or_insert_function(fnty, name="get_str_len") total_size = cgutils.alloca_once_value(builder, zero) # loop through all strings and get length with cgutils.for_range(builder, string_list.size) as loop: str_value = string_list.getitem(loop.index) str_len = builder.call(fn_len, [str_value]) builder.store(builder.add(builder.load(total_size), str_len), total_size) # allocate string array fnty = lir.FunctionType(lir.VoidType(), [ lir.IntType(32).as_pointer().as_pointer(), lir.IntType(8).as_pointer().as_pointer(), lir.IntType(8).as_pointer().as_pointer(), lir.IntType(64), lir.IntType(64) ]) fn_alloc = builder.module.get_or_insert_function( fnty, name="allocate_string_array") builder.call(fn_alloc, [ str_arr_payload._get_ptr_by_name('offsets'), str_arr_payload._get_ptr_by_name('data'), str_arr_payload._get_ptr_by_name('null_bitmap'), string_list.size, builder.load(total_size) ]) # set string array values fnty = lir.FunctionType(lir.VoidType(), [ lir.IntType(32).as_pointer(), lir.IntType(8).as_pointer(), lir.IntType(8).as_pointer(), lir.IntType(64) ]) fn_setitem = builder.module.get_or_insert_function( fnty, name="setitem_string_array") with cgutils.for_range(builder, string_list.size) as loop: str_value = string_list.getitem(loop.index) builder.call(fn_setitem, [ str_arr_payload.offsets, str_arr_payload.data, str_value, loop.index ]) builder.store(str_arr_payload._getvalue(), meminfo_data_ptr) string_array = context.make_helper(builder, typ) string_array.num_items = string_list.size string_array.num_total_chars = builder.load(total_size) #cgutils.printf(builder, "str %d %d\n", string_array.num_items, string_array.num_total_chars) string_array.offsets = str_arr_payload.offsets string_array.data = str_arr_payload.data string_array.null_bitmap = str_arr_payload.null_bitmap string_array.meminfo = meminfo ret = string_array._getvalue() #context.nrt.decref(builder, ty, ret) return impl_ret_new_ref(context, builder, typ, ret)
def build_ufunc_wrapper(library, context, func, signature, objmode, env): """ Wrap the scalar function with a loop that iterates over the arguments """ byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper_module = library.create_ir_module('') if objmode: func_type = context.call_conv.get_function_type( types.pyobject, [types.pyobject] * len(signature.args)) else: func_type = context.call_conv.get_function_type( signature.return_type, signature.args) oldfunc = func func = wrapper_module.add_function(func_type, name=func.name) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, "__ufunc__." + func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") actual_args = context.call_conv.get_arguments(func) # Prepare inputs arrays = [] for i, typ in enumerate(signature.args): arrays.append( UArrayArg(context, builder, arg_args, arg_steps, i, context.get_argument_type(typ))) # Prepare output valty = context.get_data_type(signature.return_type) out = UArrayArg(context, builder, arg_args, arg_steps, len(actual_args), valty) # Setup indices offsets = [] zero = context.get_constant(types.intp, 0) for _ in arrays: p = cgutils.alloca_once(builder, intp_t) offsets.append(p) builder.store(zero, p) store_offset = cgutils.alloca_once(builder, intp_t) builder.store(zero, store_offset) unit_strided = cgutils.true_bit for ary in arrays: unit_strided = builder.and_(unit_strided, ary.is_unit_strided) if objmode: # General loop pyapi = context.get_python_api(builder) gil = pyapi.gil_ensure() with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_obj_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, pyapi, env) pyapi.gil_release(gil) builder.ret_void() else: with cgutils.ifelse(builder, unit_strided) as (is_unit_strided, is_strided): with is_unit_strided: with cgutils.for_range(builder, loopcount, intp=intp_t) as ind: fastloop = build_fast_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, ind) builder.ret_void() with is_strided: # General loop with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_slow_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature) builder.ret_void() builder.ret_void() del builder # Run optimizer library.add_ir_module(wrapper_module) wrapper = library.get_function(wrapper.name) oldfunc.linkage = LINKAGE_INTERNAL return wrapper
def build(self): byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = self.context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function( Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper_module = self.library.create_ir_module('') func_type = self.call_conv.get_function_type(self.fndesc.restype, self.fndesc.argtypes) func = wrapper_module.add_function(func_type, name=self.func.name) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, "__gufunc__." + self.func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") # Unpack shapes unique_syms = set() for grp in (self.sin, self.sout): for syms in grp: unique_syms |= set(syms) sym_map = {} for syms in self.sin: for s in syms: if s not in sym_map: sym_map[s] = len(sym_map) sym_dim = {} for s, i in sym_map.items(): sym_dim[s] = builder.load( builder.gep(arg_dims, [self.context.get_constant(types.intp, i + 1)])) # Prepare inputs arrays = [] step_offset = len(self.sin) + len(self.sout) for i, (typ, sym) in enumerate( zip(self.signature.args, self.sin + self.sout)): ary = GUArrayArg(self.context, builder, arg_args, arg_dims, arg_steps, i, step_offset, typ, sym, sym_dim) if not ary.as_scalar: step_offset += ary.ndim arrays.append(ary) bbreturn = cgutils.get_function(builder).append_basic_block('.return') # Prologue self.gen_prologue(builder) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as ind: args = [a.array_value for a in arrays] innercall, error = self.gen_loop_body(builder, func, args) # If error, escape cgutils.cbranch_or_continue(builder, error, bbreturn) for a in arrays: a.next(ind) builder.branch(bbreturn) builder.position_at_end(bbreturn) # Epilogue self.gen_epilogue(builder) builder.ret_void() self.library.add_ir_module(wrapper_module) wrapper = self.library.get_function(wrapper.name) # Set core function to internal so that it is not generated self.func.linkage = LINKAGE_INTERNAL return wrapper, self.env
def _build_wrapper(self, library, name): """ The LLVM IRBuilder code to create the gufunc wrapper. The *library* arg is the CodeLibrary for which the wrapper should be added to. The *name* arg is the name of the wrapper function being created. """ byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = self.context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function( Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper_module = library.create_ir_module('') func_type = self.call_conv.get_function_type(self.fndesc.restype, self.fndesc.argtypes) fname = self.fndesc.llvm_func_name func = wrapper_module.add_function(func_type, name=fname) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") pyapi = self.context.get_python_api(builder) # Unpack shapes unique_syms = set() for grp in (self.sin, self.sout): for syms in grp: unique_syms |= set(syms) sym_map = {} for syms in self.sin: for s in syms: if s not in sym_map: sym_map[s] = len(sym_map) sym_dim = {} for s, i in sym_map.items(): sym_dim[s] = builder.load( builder.gep(arg_dims, [self.context.get_constant(types.intp, i + 1)])) # Prepare inputs arrays = [] step_offset = len(self.sin) + len(self.sout) for i, (typ, sym) in enumerate( zip(self.signature.args, self.sin + self.sout)): ary = GUArrayArg(self.context, builder, arg_args, arg_steps, i, step_offset, typ, sym, sym_dim) step_offset += len(sym) arrays.append(ary) bbreturn = builder.append_basic_block('.return') # Prologue self.gen_prologue(builder, pyapi) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as loop: args = [a.get_array_at_offset(loop.index) for a in arrays] innercall, error = self.gen_loop_body(builder, pyapi, func, args) # If error, escape cgutils.cbranch_or_continue(builder, error, bbreturn) builder.branch(bbreturn) builder.position_at_end(bbreturn) # Epilogue self.gen_epilogue(builder, pyapi) builder.ret_void() # Link library.add_ir_module(wrapper_module) library.add_linking_library(self.library)
def build_ufunc_wrapper(context, func, signature): """ Wrap the scalar function with a loop that iterates over the arguments """ module = func.module byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper = module.add_function(fnty, "__ufunc__." + func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") actual_args = context.get_arguments(func) # Prepare inputs arrays = [] for i, typ in enumerate(signature.args): arrays.append(UArrayArg(context, builder, arg_args, arg_steps, i, context.get_argument_type(typ))) # Prepare output valty = context.get_data_type(signature.return_type) out = UArrayArg(context, builder, arg_args, arg_steps, len(actual_args), valty) # Setup indices offsets = [] zero = context.get_constant(types.intp, 0) for _ in arrays: p = cgutils.alloca_once(builder, intp_t) offsets.append(p) builder.store(zero, p) store_offset = cgutils.alloca_once(builder, intp_t) builder.store(zero, store_offset) unit_strided = cgutils.true_bit for ary in arrays: unit_strided = builder.and_(unit_strided, ary.is_unit_strided) with cgutils.ifelse(builder, unit_strided) as (is_unit_strided, is_strided): with is_unit_strided: with cgutils.for_range(builder, loopcount, intp=intp_t) as ind: fastloop = build_fast_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, ind) builder.ret_void() with is_strided: # General loop with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_slow_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature) builder.ret_void() builder.ret_void() del builder # Set core function to internal so that it is not generated func.linkage = LINKAGE_INTERNAL # Force inline of code function inline_function(slowloop) inline_function(fastloop) # Run optimizer context.optimize(module) if config.DUMP_OPTIMIZED: print(module) return wrapper