示例#1
0
    def __init__(self):
        # Lazily load the libHLC library
        bitcode_path = os.path.join(sys.prefix, 'share', 'rocmtools')
        assert os.path.exists(bitcode_path) and os.path.isdir(bitcode_path)
        self.bitcode_path = bitcode_path
        dev_ctx = devices.get_context()
        target_cpu = dev_ctx.agent.name
        self.target_cpu = target_cpu

        if self.hlc is None:
            try:
                hlc = CDLL(os.path.join(sys.prefix, 'lib', 'librocmlite.so'))
            except OSError:
                raise ImportError("librocmlite.so cannot be found.  Please "
                                  "install the roctools package by: "
                                  "conda install -c numba roctools")

            else:
                hlc.ROC_ParseModule.restype = moduleref_ptr
                hlc.ROC_ParseBitcode.restype = moduleref_ptr
                hlc.ROC_ModuleEmitBRIG.restype = c_size_t
                hlc.ROC_Initialize()
                weakref.finalize(hlc, hlc.ROC_Finalize)

                hlc.ROC_SetCommandLineOption.argtypes = [
                    c_int,
                    c_void_p,
                ]

                type(self).hlc = hlc
示例#2
0
    def _initialize(self):
        if not self.initialized:
            dev_ctx = devices.get_context()
            target_cpu = dev_ctx.agent.name.decode('UTF-8')
            self.target_cpu = "-mcpu %s" % target_cpu

        self.CMD_OPT = ' '.join([
            self.opt, "-O3", self.triple_flag, self.target_cpu,
            "-disable-simplify-libcalls", "-verify", "-S", "-o {fout}", "{fin}"
        ])

        self.CMD_VERIFY = ' '.join([
            self.opt, "-verify", self.triple_flag, self.target_cpu, "-S",
            "-o {fout}", "{fin}"
        ])

        self.CMD_GEN_HSAIL = ' '.join([
            self.llc, "-O2", self.triple_flag, self.target_cpu,
            "-filetype=asm", "-o {fout}", "{fin}"
        ])

        self.CMD_GEN_BRIG = ' '.join([
            self.llc, "-O2", self.triple_flag, self.target_cpu,
            "-filetype=obj", "-o {fout}", "{fin}"
        ])

        self.CMD_LINK_BUILTINS = ' '.join(
            [self.llvm_link, "-S", "-o {fout}", "{fin}", "{lib}"])

        self.CMD_LINK_LIBS = ' '.join(
            [self.llvm_link, "-S", "-o {fout}", "{fin}"])

        self.CMD_LINK_BRIG = ' '.join(
            [self.ld_lld, "-shared", "-o {fout}", "{fin}"])
示例#3
0
def to_device(obj, stream=None, context=None, copy=True, to=None):
    """to_device(obj, context, copy=True, to=None)

    Allocate and transfer a numpy ndarray or structured scalar to the device.

    To copy host->device a numpy array::

        ary = numpy.arange(10)
        d_ary = roc.to_device(ary)

    The resulting ``d_ary`` is a ``DeviceNDArray``.

    To copy device->host::

        hary = d_ary.copy_to_host()

    To copy device->host to an existing array::

        ary = numpy.empty(shape=d_ary.shape, dtype=d_ary.dtype)
        d_ary.copy_to_host(ary)

    """
    context = context or get_context()

    if to is None:
        to = devicearray.from_array_like(obj)

    if copy:
        to.copy_to_device(obj, stream=stream, context=context)
    return to
示例#4
0
文件: api.py 项目: cpcloud/numba
def to_device(obj, stream=None, context=None, copy=True, to=None):
    """to_device(obj, context, copy=True, to=None)

    Allocate and transfer a numpy ndarray or structured scalar to the device.

    To copy host->device a numpy array::

        ary = numpy.arange(10)
        d_ary = roc.to_device(ary)

    The resulting ``d_ary`` is a ``DeviceNDArray``.

    To copy device->host::

        hary = d_ary.copy_to_host()

    To copy device->host to an existing array::

        ary = numpy.empty(shape=d_ary.shape, dtype=d_ary.dtype)
        d_ary.copy_to_host(ary)

    """
    context = context or get_context()

    if to is None:
        to = devicearray.from_array_like(obj)

    if copy:
        to.copy_to_device(obj, stream=stream, context=context)
    return to
示例#5
0
文件: hlc.py 项目: cpcloud/numba
    def _initialize(self):
        if not self.initialized:
            dev_ctx = devices.get_context()
            target_cpu = dev_ctx.agent.name.decode('UTF-8')
            self.target_cpu = "-mcpu %s" % target_cpu

        self.CMD_OPT = ' '.join([
                self.opt,
                "-O3",
                self.triple_flag,
                self.target_cpu,
                "-disable-simplify-libcalls",
                "-verify",
                "-S",
                "-o {fout}",
                "{fin}"])

        self.CMD_VERIFY = ' '.join([
                    self.opt,
                    "-verify",
                    self.triple_flag,
                    self.target_cpu,
                    "-S",
                    "-o {fout}",
                    "{fin}"])

        self.CMD_GEN_HSAIL = ' '.join([self.llc,
                        "-O2",
                        self.triple_flag,
                        self.target_cpu,
                        "-filetype=asm",
                        "-o {fout}",
                        "{fin}"])

        self.CMD_GEN_BRIG = ' '.join([self.llc,
                        "-O2",
                        self.triple_flag,
                        self.target_cpu,
                        "-filetype=obj",
                        "-o {fout}",
                        "{fin}"])

        self.CMD_LINK_BUILTINS = ' '.join([
                            self.llvm_link,
                            "-S",
                            "-o {fout}",
                            "{fin}",
                            "{lib}"])

        self.CMD_LINK_LIBS = ' '.join([self.llvm_link,
                        "-S",
                        "-o {fout}",
                        "{fin}"])

        self.CMD_LINK_BRIG = ' '.join([self.ld_lld,
                        "-shared",
                        "-o {fout}",
                        "{fin}"])
示例#6
0
    def __init__(self, shape, strides, dtype, dgpu_data=None):
        """
        Args
        ----

        shape
            array shape.
        strides
            array strides.
        dtype
            data type as numpy.dtype.
        dgpu_data
            user provided device memory for the ndarray data buffer
        """
        if isinstance(shape, (int, long)):
            shape = (shape,)
        if isinstance(strides, (int, long)):
            strides = (strides,)
        self.ndim = len(shape)
        if len(strides) != self.ndim:
            raise ValueError("strides not match ndim")
        self._dummy = dummyarray.Array.from_desc(0, shape, strides, dtype.itemsize)
        self.shape = tuple(shape)
        self.strides = tuple(strides)
        self.dtype = np.dtype(dtype)
        self.size = int(np.prod(self.shape))
        # prepare dgpu memory
        if self.size > 0:
            if dgpu_data is None:
                from numba.roc.api import _memory_size_from_info

                self.alloc_size = _memory_size_from_info(
                    self.shape, self.strides, self.dtype.itemsize
                )
                # find a coarse region on the dGPU
                dgpu_data = devices.get_context().mempoolalloc(self.alloc_size)
            else:  # we have some preallocated dgpu_memory
                sz = getattr(dgpu_data, "_hsa_memsize_", None)
                if sz is None:
                    raise ValueError("dgpu_data as no _hsa_memsize_ attribute")
                assert sz >= 0
                self.alloc_size = sz
        else:
            dgpu_data = None
            self.alloc_size = 0

        self.dgpu_data = dgpu_data