示例#1
0
    def __call__(self, input, u, v):
        output = zeros_like(input.data)
        events = []
        in_buf, in_evt = buffer_from_ndarray(self.queue, input.data,
                                             blocking=False)
        events.append(in_evt)
        self.kernel.setarg(0, in_buf, sizeof(cl_mem))

        u_buf, u_evt = buffer_from_ndarray(self.queue, u.data, blocking=False)
        events.append(u_evt)
        self.kernel.setarg(1, u_buf, sizeof(cl_mem))

        v_buf, v_evt = buffer_from_ndarray(self.queue, v.data, blocking=False)
        events.append(v_evt)
        self.kernel.setarg(2, v_buf, sizeof(cl_mem))

        out_buf, out_evt = buffer_from_ndarray(self.queue, output,
                                               blocking=False)
        events.append(out_evt)
        self.kernel.setarg(3, out_buf, sizeof(cl_mem))
        clWaitForEvents(*events)
        evt = clEnqueueNDRangeKernel(self.queue, self.kernel, self.global_size)
        evt.wait()
        _, evt = buffer_to_ndarray(self.queue, out_buf, output)
        evt.wait()
        return Array(unique_name(), output)
示例#2
0
文件: core.py 项目: lowks/hindemith
 def process_args(self, *args):
     processed = []
     events = []
     output = ct.c_int()
     out_like = None
     for arg in args:
         if isinstance(arg, np.ndarray):
             buf, evt = cl.buffer_from_ndarray(self.queue, arg,
                                               blocking=False)
             processed.append(buf)
             events.append(evt)
             output = buf.empty_like_this()
             out_like = arg
         else:
             if isinstance(arg, int):
                 processed.append(arg)
             elif isinstance(arg, float) and isinstance(output, ct.c_int):
                 processed.append(arg)
                 output = ct.c_float()
             else:
                 raise NotImplementedError(
                     "UnsupportedType: %s" % type(arg)
                 )
     if self.output is not None:
         output, evt = cl.buffer_from_ndarray(self.queue, self.output,
                                              blocking=False)
         out_like = self.output
         evt.wait()
     if isinstance(output, cl.cl_mem):
         processed.append(output)
     else:
         processed.append(output.byref)
     cl.clWaitForEvents(*events)
     return processed, output, out_like
示例#3
0
    def __call__(self, *args):
        """__call__

        :param *args:
        """
        if isinstance(args[0], hmarray):
            output = empty_like(args[0])
        else:
            output = np.zeros_like(args[0])
        # self.kernel.argtypes = tuple(
        #     cl_mem for _ in args + (output, )
        # ) + (localmem, )
        buffers = []
        events = []
        for index, arg in enumerate(args + (output, )):
            if isinstance(arg, hmarray):
                buffers.append(arg.ocl_buf)
            else:
                buf, evt = buffer_from_ndarray(self.queue, arg, blocking=True)
                # evt.wait()
                events.append(evt)
                buffers.append(buf)
                # self.kernel.setarg(index, buf, sizeof(cl_mem))
        cl.clWaitForEvents(*events)
        cl_error = 0
        if isinstance(self.kernel, list):
            kernels = len(self.kernel)
            if kernels == 2:
                cl_error = self._c_function(self.queue, self.kernel[0],
                                            self.kernel[1], *buffers)
            elif kernels == 3:
                cl_error = self._c_function(self.queue, self.kernel[0],
                                            self.kernel[1], self.kernel[2],
                                            *buffers)
            elif kernels == 4:
                cl_error = self._c_function(
                    self.queue, self.kernel[0], self.kernel[1], self.kernel[2],
                    self.kernel[3], *buffers
                )
        else:
            cl_error = self._c_function(self.queue, self.kernel, *buffers)

        if cl.cl_errnum(cl_error) != cl.cl_errnum.CL_SUCCESS:
            raise StencilException(
                "Error executing stencil kernel: opencl {} {}".format(
                    cl_error,
                    cl.cl_errnum(cl_error)
                )
            )
        if isinstance(output, hmarray):
            return output
        buf, evt = buffer_to_ndarray(
            self.queue, buffers[-1], output
        )
        evt.wait()

        return buf
示例#4
0
 def process_inputs(self, *args):
     events = []
     processed = []
     self.kernel.argtypes = tuple(cl_mem for _ in args)
     for index, arg in enumerate(args):
         if isinstance(arg, types.common.Array):
             arg = arg.data
         buf, evt = buffer_from_ndarray(self.queue, arg, blocking=False)
         processed.append(buf)
         events.append(evt)
         self.kernel.setarg(index, buf, sizeof(cl_mem))
     clWaitForEvents(*events)
     return processed
示例#5
0
        def fn(*args, **kwargs):
            for source, arg in zip(filtered_sources, args):
                self.symbol_table[source.name] = arg
            if len(kernels) == 0:
                for op, params in zip(block, block_params):
                    _sinks, _sources = params
                    # if len(kernels) < 1 or \
                    #    kernels[-1].launch_paramaters != launch_params:
                    #    kernels.append(Kernel(launch_params))
                    # else:
                    #     raise NotImplementedError()
                    if self.is_not_device_level(op):
                        launch_params = self.get_launch_params(
                            op, _sources, _sinks)
                        if len(kernels) == 0 or \
                                not isinstance(kernels[-1], Kernel) or \
                                kernels[-1].launch_parameters[0] != launch_params[0] \
                                or len(launch_params) > 1 and launch_params[1]:
                            kernels.append(Kernel(launch_params))
                        kernels[-1].append_body(
                            self.get_emit(op, _sources, _sinks))
                        for source in _sources:
                            if isinstance(self.symbol_table[source.name],
                                          hmarray):
                                kernels[-1].sources.add(source)
                        for sink in _sinks:
                            if isinstance(self.symbol_table[sink.name],
                                          hmarray):
                                kernels[-1].sinks.add(sink)
                    else:
                        kernels.append(self.get_launcher(op, _sources, _sinks))
                for kernel in kernels:
                    kernel.compile()
                    self.kernels.append(kernel)
            kernel_map = {}
            for kernel in kernels:
                evts = []
                for source in kernel.sources:
                    if source.name in kernel_map:
                        evts.extend(kernel_map[source.name])
                evts = kernel.launch(self.symbol_table, evts)
                for sink in kernel.sinks:
                    kernel_map[sink.name] = evts

            if backend in {"ocl", "opencl", "OCL"}:
                cl.clWaitForEvents(*evts)
            ret = tuple(self.symbol_table[sink.name]
                        for sink in filtered_sinks)
            if len(ret) == 1:
                return ret[0]
            return ret
示例#6
0
        def fn(*args, **kwargs):
            for source, arg in zip(filtered_sources, args):
                self.symbol_table[source.name] = arg
            if len(kernels) == 0:
                for op, params in zip(block, block_params):
                    _sinks, _sources = params
                    # if len(kernels) < 1 or \
                    #    kernels[-1].launch_paramaters != launch_params:
                    #    kernels.append(Kernel(launch_params))
                    # else:
                    #     raise NotImplementedError()
                    if self.is_not_device_level(op):
                        launch_params = self.get_launch_params(
                            op, _sources, _sinks)
                        if len(kernels) == 0 or \
                                not isinstance(kernels[-1], Kernel) or \
                                kernels[-1].launch_parameters[0] != launch_params[0] \
                                or len(launch_params) > 1 and launch_params[1]:
                            kernels.append(Kernel(launch_params))
                        kernels[-1].append_body(
                            self.get_emit(op, _sources, _sinks)
                        )
                        for source in _sources:
                            if isinstance(self.symbol_table[source.name], hmarray):
                                kernels[-1].sources.add(source)
                        for sink in _sinks:
                            if isinstance(self.symbol_table[sink.name], hmarray):
                                kernels[-1].sinks.add(sink)
                    else:
                        kernels.append(self.get_launcher(op, _sources, _sinks))
                for kernel in kernels:
                    kernel.compile()
                    self.kernels.append(kernel)
            kernel_map = {}
            for kernel in kernels:
                evts = []
                for source in kernel.sources:
                    if source.name in kernel_map:
                        evts.extend(kernel_map[source.name])
                evts = kernel.launch(self.symbol_table, evts)
                for sink in kernel.sinks:
                    kernel_map[sink.name] = evts

            if backend in {"ocl", "opencl", "OCL"}:
                cl.clWaitForEvents(*evts)
            ret = tuple(self.symbol_table[sink.name] for sink in filtered_sinks)
            if len(ret) == 1:
                return ret[0]
            return ret
示例#7
0
    def __call__(self, *args):
        """__call__

        :param *args:
        """
        if self.output is not None:
            output = self.output
            self.output = None
        else:
            output = np.zeros_like(args[0])
        self.kernel.argtypes = tuple(cl_mem
                                     for _ in args + (output, )) + (localmem, )
        bufs = []
        events = []
        for index, arg in enumerate(args + (output, )):
            buf, evt = buffer_from_ndarray(self.queue, arg, blocking=False)
            # evt.wait()
            events.append(evt)
            bufs.append(buf)
            self.kernel.setarg(index, buf, sizeof(cl_mem))
        cl.clWaitForEvents(*events)
        if self.device.type == cl.cl_device_type.CL_DEVICE_TYPE_GPU:
            local = 8
        else:
            local = 1
        localmem_size = reduce(operator.mul, (local + (self.ghost_depth * 2)
                                              for _ in range(args[0].ndim)),
                               sizeof(c_float))
        self.kernel.setarg(
            len(args) + 1, localmem(localmem_size), localmem_size)
        evt = clEnqueueNDRangeKernel(self.queue, self.kernel, self.global_size,
                                     tuple(local for _ in range(args[0].ndim)))
        evt.wait()
        buf, evt = buffer_to_ndarray(self.queue, bufs[-1], output)
        evt.wait()
        for mem in bufs:
            del mem

        return buf