示例#1
0
def grid_expand(ndim):
    """grid(ndim)

    Return the absolute position of the current thread in the entire
    grid of blocks.  *ndim* should correspond to the number of dimensions
    declared when instantiating the kernel.  If *ndim* is 1, a single integer
    is returned.  If *ndim* is 2 or 3, a tuple of the given number of
    integers is returned.

    Computation of the first integer is as follows::

        cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x

    and is similar for the other two indices, but using the ``y`` and ``z``
    attributes.
    """
    if ndim == 1:
        fname = "ptx.grid.1d"
        restype = types.int32
    elif ndim == 2:
        fname = "ptx.grid.2d"
        restype = types.UniTuple(types.int32, 2)
    elif ndim == 3:
        fname = "ptx.grid.3d"
        restype = types.UniTuple(types.int32, 3)
    else:
        raise ValueError('argument can only be 1, 2, 3')

    return ir.Intrinsic(fname, typing.signature(restype, types.intp),
                        args=[ndim])
示例#2
0
def local_array(shape, dtype):
    shape = _legalize_shape(shape)
    ndim = len(shape)
    fname = "ptx.lmem.alloc"
    restype = types.Array(dtype, ndim, 'C')
    sig = typing.signature(restype, types.UniTuple(types.intp, ndim), types.Any)
    return ir.Intrinsic(fname, sig, args=(shape, dtype))
示例#3
0
def gridsize_expand(ndim):
    """
    Return the absolute size (or shape) in threads of the entire grid of
    blocks. *ndim* should correspond to the number of dimensions declared when
    instantiating the kernel.

    Computation of the first integer is as follows::

        cuda.blockDim.x * cuda.gridDim.x

    and is similar for the other two indices, but using the ``y`` and ``z``
    attributes.
    """
    if ndim == 1:
        fname = "ptx.gridsize.1d"
        restype = types.int32
    elif ndim == 2:
        fname = "ptx.gridsize.2d"
        restype = types.UniTuple(types.int32, 2)
    elif ndim == 3:
        fname = "ptx.gridsize.3d"
        restype = types.UniTuple(types.int32, 3)
    else:
        raise ValueError('argument can only be 1, 2 or 3')

    return ir.Intrinsic(fname, typing.signature(restype, types.intp),
                        args=[ndim])
示例#4
0
 def _expand_non_callable_macro(self, macro, loc):
     """
     Return the IR expression of expanding the non-callable macro.
     """
     intr = ir.Intrinsic(macro.name, macro.func, args=())
     new_expr = ir.Expr.call(func=intr, args=(), kws=(), loc=loc)
     return new_expr
示例#5
0
def shared_array(shape, dtype):
    shape = _legalize_shape(shape)
    ndim = len(shape)
    fname = "hsail.smem.alloc"
    restype = types.Array(dtype, ndim, "C")
    sig = typing.signature(restype, types.UniTuple(types.intp, ndim),
                           types.Any)
    return ir.Intrinsic(fname, sig, args=(shape, dtype))
示例#6
0
def const_array_like(ndarray):
    fname = "ptx.cmem.arylike"

    from .descriptor import CUDATargetDesc
    aryty = CUDATargetDesc.typingctx.resolve_argument_type(ndarray)

    sig = typing.signature(aryty, aryty)
    return ir.Intrinsic(fname, sig, args=[ndarray])
示例#7
0
文件: test_ir.py 项目: zsoltc89/numba
 def test_intrinsic(self):
     a = ir.Intrinsic('foo', 'bar', (0, ), self.loc1)
     b = ir.Intrinsic('foo', 'bar', (0, ), self.loc1)
     c = ir.Intrinsic('foo', 'bar', (0, ), self.loc2)
     d = ir.Intrinsic('baz', 'bar', (0, ), self.loc1)
     e = ir.Intrinsic('foo', 'baz', (0, ), self.loc1)
     f = ir.Intrinsic('foo', 'bar', (1, ), self.loc1)
     self.check(a, same=[b, c], different=[d, e, f])
示例#8
0
 def test_intrinsic(self):
     a = ir.Intrinsic("foo", "bar", (0,), self.loc1)
     b = ir.Intrinsic("foo", "bar", (0,), self.loc1)
     c = ir.Intrinsic("foo", "bar", (0,), self.loc2)
     d = ir.Intrinsic("baz", "bar", (0,), self.loc1)
     e = ir.Intrinsic("foo", "baz", (0,), self.loc1)
     f = ir.Intrinsic("foo", "bar", (1,), self.loc1)
     self.check(a, same=[b, c], different=[d, e, f])