def timedelta_mod_timedelta(context, builder, sig, args): # inspired by https://github.com/numpy/numpy/blob/fe8072a12d65e43bd2e0b0f9ad67ab0108cc54b3/numpy/core/src/umath/loops.c.src#L1424 # alg is basically as `a % b`: # if a or b is NaT return NaT # elseif b is 0 return NaT # else pretend a and b are int and do pythonic int modulus [va, vb] = args [ta, tb] = sig.args not_nan = are_not_nat(builder, [va, vb]) ll_ret_type = context.get_value_type(sig.return_type) ret = alloc_timedelta_result(builder) builder.store(NAT, ret) zero = Constant.int(ll_ret_type, 0) with cgutils.if_likely(builder, not_nan): va, vb = normalize_timedeltas(context, builder, va, vb, ta, tb) # is the denominator zero or NaT? denom_ok = builder.not_(builder.icmp_signed('==', vb, zero)) with cgutils.if_likely(builder, denom_ok): # is either arg negative? vapos = builder.icmp_signed('>', va, zero) vbpos = builder.icmp_signed('>', vb, zero) rem = builder.srem(va, vb) cond = builder.or_(builder.and_(vapos, vbpos), builder.icmp_signed('==', rem, zero)) with builder.if_else(cond) as (then, otherwise): with then: builder.store(rem, ret) with otherwise: builder.store(builder.add(rem, vb), ret) res = builder.load(ret) return impl_ret_untracked(context, builder, sig.return_type, res)
def timedelta_floor_div_timedelta(context, builder, sig, args): [va, vb] = args [ta, tb] = sig.args ll_ret_type = context.get_value_type(sig.return_type) not_nan = are_not_nat(builder, [va, vb]) ret = cgutils.alloca_once(builder, ll_ret_type, name='ret') zero = Constant.int(ll_ret_type, 0) one = Constant.int(ll_ret_type, 1) builder.store(zero, ret) with cgutils.if_likely(builder, not_nan): va, vb = normalize_timedeltas(context, builder, va, vb, ta, tb) # is the denominator zero or NaT? denom_ok = builder.not_(builder.icmp_signed('==', vb, zero)) with cgutils.if_likely(builder, denom_ok): # is either arg negative? vaneg = builder.icmp_signed('<', va, zero) neg = builder.or_(vaneg, builder.icmp_signed('<', vb, zero)) with builder.if_else(neg) as (then, otherwise): with then: # one or more value negative with builder.if_else(vaneg) as (negthen, negotherwise): with negthen: top = builder.sub(va, one) div = builder.sdiv(top, vb) builder.store(div, ret) with negotherwise: top = builder.add(va, one) div = builder.sdiv(top, vb) builder.store(div, ret) with otherwise: div = builder.sdiv(va, vb) builder.store(div, ret) res = builder.load(ret) return impl_ret_untracked(context, builder, sig.return_type, res)
def unbox_buffer(typ, obj, c): """ Convert a Py_buffer-providing object to a native array structure. """ buf = c.pyapi.alloca_buffer() res = c.pyapi.get_buffer(obj, buf) is_error = cgutils.is_not_null(c.builder, res) nativearycls = c.context.make_array(typ) nativeary = nativearycls(c.context, c.builder) aryptr = nativeary._getpointer() with cgutils.if_likely(c.builder, c.builder.not_(is_error)): ptr = c.builder.bitcast(aryptr, c.pyapi.voidptr) if c.context.enable_nrt: c.pyapi.nrt_adapt_buffer_from_python(buf, ptr) else: c.pyapi.numba_buffer_adaptor(buf, ptr) def cleanup(): c.pyapi.release_buffer(buf) return NativeValue(c.builder.load(aryptr), is_error=is_error, cleanup=cleanup)
def unbox_unicodecharseq(typ, obj, c): lty = c.context.get_value_type(typ) ok, buffer, size, kind, is_ascii, hashv = \ c.pyapi.string_as_string_size_and_kind(obj) # If conversion is ok, copy the buffer to the output storage. with cgutils.if_likely(c.builder, ok): # Check if the returned string size fits in the charseq storage_size = ir.Constant(size.type, typ.count) size_fits = c.builder.icmp_unsigned("<=", size, storage_size) # Allow truncation of string size = c.builder.select(size_fits, size, storage_size) # Initialize output to zero bytes null_string = ir.Constant(lty, None) outspace = cgutils.alloca_once_value(c.builder, null_string) # We don't need to set the NULL-terminator because the storage # is already zero-filled. cgutils.memcpy(c.builder, c.builder.bitcast(outspace, buffer.type), buffer, size) ret = c.builder.load(outspace) return NativeValue(ret, is_error=c.builder.not_(ok))
def raise_error(self, builder, api, status): """ Given a non-ok *status*, raise the corresponding Python exception. """ bbend = builder.function.append_basic_block() with builder.if_then(status.is_user_exc): # Unserialize user exception. # Make sure another error may not interfere. api.err_clear() exc = api.unserialize(status.excinfoptr) with cgutils.if_likely(builder, cgutils.is_not_null(builder, exc)): api.raise_object(exc) # steals ref builder.branch(bbend) with builder.if_then(status.is_stop_iteration): api.err_set_none("PyExc_StopIteration") builder.branch(bbend) with builder.if_then(status.is_python_exc): # Error already raised => nothing to do builder.branch(bbend) api.err_set_string("PyExc_SystemError", "unknown error when calling native function") builder.branch(bbend) builder.position_at_end(bbend)
def unbox_funcptr(typ, obj, c): if typ.get_pointer is None: raise NotImplementedError(typ) # Call get_pointer() on the object to get the raw pointer value ptrty = c.context.get_function_pointer_type(typ) ret = cgutils.alloca_once_value(c.builder, ir.Constant(ptrty, None), name="fnptr") ser = c.pyapi.serialize_object(typ.get_pointer) get_pointer = c.pyapi.unserialize(ser) with cgutils.if_likely(c.builder, cgutils.is_not_null(c.builder, get_pointer)): intobj = c.pyapi.call_function_objargs(get_pointer, (obj,)) c.pyapi.decref(get_pointer) with cgutils.if_likely(c.builder, cgutils.is_not_null(c.builder, intobj)): ptr = c.pyapi.long_as_voidptr(intobj) c.pyapi.decref(intobj) c.builder.store(c.builder.bitcast(ptr, ptrty), ret) return NativeValue(c.builder.load(ret), is_error=c.pyapi.c_api_error())
def timedelta_sub_impl(context, builder, sig, args): [va, vb] = args [ta, tb] = sig.args ret = alloc_timedelta_result(builder) with cgutils.if_likely(builder, are_not_nat(builder, [va, vb])): va = scale_timedelta(context, builder, va, ta, sig.return_type) vb = scale_timedelta(context, builder, vb, tb, sig.return_type) builder.store(builder.sub(va, vb), ret) res = builder.load(ret) return impl_ret_untracked(context, builder, sig.return_type, res)
def impl(context, builder, dt_arg, dt_unit, td_arg, td_unit, ret_unit): ret = alloc_timedelta_result(builder) with cgutils.if_likely(builder, are_not_nat(builder, [dt_arg, td_arg])): dt_arg = convert_datetime_for_arith(builder, dt_arg, dt_unit, ret_unit) td_factor = npdatetime_helpers.get_timedelta_conversion_factor( td_unit, ret_unit ) td_arg = scale_by_constant(builder, td_arg, td_factor) ret_val = getattr(builder, ll_op_name)(dt_arg, td_arg) builder.store(ret_val, ret) return builder.load(ret)
def datetime_minus_datetime(context, builder, sig, args): va, vb = args ta, tb = sig.args unit_a = ta.unit unit_b = tb.unit ret_unit = sig.return_type.unit ret = alloc_timedelta_result(builder) with cgutils.if_likely(builder, are_not_nat(builder, [va, vb])): va = convert_datetime_for_arith(builder, va, unit_a, ret_unit) vb = convert_datetime_for_arith(builder, vb, unit_b, ret_unit) ret_val = builder.sub(va, vb) builder.store(ret_val, ret) res = builder.load(ret) return impl_ret_untracked(context, builder, sig.return_type, res)
def timedelta_over_timedelta(context, builder, sig, args): [va, vb] = args [ta, tb] = sig.args not_nan = are_not_nat(builder, [va, vb]) ll_ret_type = context.get_value_type(sig.return_type) ret = cgutils.alloca_once(builder, ll_ret_type, name='ret') builder.store(Constant(ll_ret_type, float('nan')), ret) with cgutils.if_likely(builder, not_nan): va, vb = normalize_timedeltas(context, builder, va, vb, ta, tb) va = builder.sitofp(va, ll_ret_type) vb = builder.sitofp(vb, ll_ret_type) builder.store(builder.fdiv(va, vb), ret) res = builder.load(ret) return impl_ret_untracked(context, builder, sig.return_type, res)
def _timedelta_times_number(context, builder, td_arg, td_type, number_arg, number_type, return_type): ret = alloc_timedelta_result(builder) with cgutils.if_likely(builder, is_not_nat(builder, td_arg)): if isinstance(number_type, types.Float): val = builder.sitofp(td_arg, number_arg.type) val = builder.fmul(val, number_arg) val = _cast_to_timedelta(context, builder, val) else: val = builder.mul(td_arg, number_arg) # The scaling is required for ufunc np.multiply() with an explicit # output in a different unit. val = scale_timedelta(context, builder, val, td_type, return_type) builder.store(val, ret) return builder.load(ret)
def iternext_zip(context, builder, sig, args, result): genty, = sig.args gen, = args impl = context.get_generator_impl(genty) status, retval = impl(context, builder, sig, args) context.add_linking_libs(getattr(impl, 'libs', ())) with cgutils.if_likely(builder, status.is_ok): result.set_valid(True) result.yield_(retval) with cgutils.if_unlikely(builder, status.is_stop_iteration): result.set_exhausted() with cgutils.if_unlikely(builder, builder.and_(status.is_error, builder.not_(status.is_stop_iteration))): context.call_conv.return_status_propagate(builder, status)
def timedelta_over_number(context, builder, sig, args): td_arg, number_arg = args number_type = sig.args[1] ret = alloc_timedelta_result(builder) ok = builder.and_(is_not_nat(builder, td_arg), builder.not_(cgutils.is_scalar_zero_or_nan(builder, number_arg))) with cgutils.if_likely(builder, ok): # Denominator is non-zero, non-NaN if isinstance(number_type, types.Float): val = builder.sitofp(td_arg, number_arg.type) val = builder.fdiv(val, number_arg) val = _cast_to_timedelta(context, builder, val) else: val = builder.sdiv(td_arg, number_arg) # The scaling is required for ufuncs np.*divide() with an explicit # output in a different unit. val = scale_timedelta(context, builder, val, sig.args[0], sig.return_type) builder.store(val, ret) res = builder.load(ret) return impl_ret_untracked(context, builder, sig.return_type, res)
def generate_kernel_wrapper(self, library, fname, argtypes, debug): """ Generate the kernel wrapper in the given ``library``. The function being wrapped have the name ``fname`` and argument types ``argtypes``. The wrapper function is returned. """ arginfo = self.get_arg_packer(argtypes) argtys = list(arginfo.argument_types) wrapfnty = ir.FunctionType(ir.VoidType(), argtys) wrapper_module = self.create_module("cuda.kernel.wrapper") fnty = ir.FunctionType( ir.IntType(32), [self.call_conv.get_return_type(types.pyobject)] + argtys) func = ir.Function(wrapper_module, fnty, fname) prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy') wrapfn = ir.Function(wrapper_module, wrapfnty, prefixed) builder = ir.IRBuilder(wrapfn.append_basic_block('')) # Define error handling variables def define_error_gv(postfix): name = wrapfn.name + postfix gv = cgutils.add_global_variable(wrapper_module, ir.IntType(32), name) gv.initializer = ir.Constant(gv.type.pointee, None) return gv gv_exc = define_error_gv("__errcode__") gv_tid = [] gv_ctaid = [] for i in 'xyz': gv_tid.append(define_error_gv("__tid%s__" % i)) gv_ctaid.append(define_error_gv("__ctaid%s__" % i)) callargs = arginfo.from_arguments(builder, wrapfn.args) status, _ = self.call_conv.call_function(builder, func, types.void, argtypes, callargs) if debug: # Check error status with cgutils.if_likely(builder, status.is_ok): builder.ret_void() with builder.if_then(builder.not_(status.is_python_exc)): # User exception raised old = ir.Constant(gv_exc.type.pointee, None) # Use atomic cmpxchg to prevent rewriting the error status # Only the first error is recorded if nvvm.NVVM().is_nvvm70: xchg = builder.cmpxchg(gv_exc, old, status.code, 'monotonic', 'monotonic') changed = builder.extract_value(xchg, 1) else: casfnty = ir.FunctionType( old.type, [gv_exc.type, old.type, old.type]) cas_hack = "___numba_atomic_i32_cas_hack" casfn = ir.Function(wrapper_module, casfnty, name=cas_hack) xchg = builder.call(casfn, [gv_exc, old, status.code]) changed = builder.icmp_unsigned('==', xchg, old) # If the xchange is successful, save the thread ID. sreg = nvvmutils.SRegBuilder(builder) with builder.if_then(changed): for dim, ptr, in zip("xyz", gv_tid): val = sreg.tid(dim) builder.store(val, ptr) for dim, ptr, in zip("xyz", gv_ctaid): val = sreg.ctaid(dim) builder.store(val, ptr) builder.ret_void() nvvm.set_cuda_kernel(wrapfn) library.add_ir_module(wrapper_module) library.finalize() wrapfn = library.get_function(wrapfn.name) return wrapfn