Пример #1
0
def main(dtype):
    from pycuda.elementwise import get_linear_combination_kernel
    lc_kernel, lc_texrefs = get_linear_combination_kernel(
        ((True, dtype, dtype), (True, dtype, dtype)), dtype)

    for size_exp in range(10, 26):
        size = 1 << size_exp

        from pycuda.curandom import rand
        a = gpuarray.to_gpu(numpy.array(5, dtype=dtype))
        x = rand(size, dtype=dtype)
        b = gpuarray.to_gpu(numpy.array(7, dtype=dtype))
        y = rand(size, dtype=dtype)

        z = gpuarray.empty_like(x)

        start = drv.Event()
        stop = drv.Event()
        start.record()

        for i in range(20):
            a.bind_to_texref_ext(lc_texrefs[0], allow_double_hack=True)
            b.bind_to_texref_ext(lc_texrefs[1], allow_double_hack=True)
            lc_kernel.prepared_call(x._grid, x._block, x.gpudata, y.gpudata,
                                    z.gpudata, x.mem_size)

        stop.record()
        stop.synchronize()

        print(size, size_exp, stop.time_since(start))
Пример #2
0
def main(dtype):
    from pycuda.elementwise import get_linear_combination_kernel
    lc_kernel, lc_texrefs = get_linear_combination_kernel((
        (True, dtype, dtype),
        (True, dtype, dtype)
        ), dtype)

    for size_exp in range(10, 26):
        size = 1 << size_exp

        from pycuda.curandom import rand
        a = gpuarray.to_gpu(numpy.array(5, dtype=dtype))
        x = rand(size, dtype=dtype)
        b = gpuarray.to_gpu(numpy.array(7, dtype=dtype))
        y = rand(size, dtype=dtype)

        z = gpuarray.empty_like(x)

        start = drv.Event()
        stop = drv.Event()
        start.record()

        for i in range(20):
            a.bind_to_texref_ext(lc_texrefs[0], allow_double_hack=True)
            b.bind_to_texref_ext(lc_texrefs[1], allow_double_hack=True)
            lc_kernel.prepared_call(x._grid, x._block,
                x.gpudata, y.gpudata, z.gpudata, x.mem_size)

        stop.record()
        stop.synchronize()

        print size, size_exp, stop.time_since(start)
Пример #3
0
    def __init__(self, result_dtype, scalar_dtype, sample_vec, arg_count,
            pool=None):
        from pycuda.elementwise import get_linear_combination_kernel
        self.vector_dtype = sample_vec.dtype
        self.result_dtype = result_dtype
        self.shape = sample_vec.shape
        self.block = sample_vec._block
        self.grid = sample_vec._grid
        self.mem_size = sample_vec.mem_size

        self.kernel, _ = get_linear_combination_kernel(
                arg_count*((False, scalar_dtype, self.vector_dtype),),
                result_dtype)

        if pool:
            self.allocator = pool.allocate
        else:
            self.allocator = None
Пример #4
0
    def __init__(self, result_dtype, scalar_dtype, sample_vec, arg_count,
            pool=None):
        from pycuda.elementwise import get_linear_combination_kernel
        self.vector_dtype = sample_vec.dtype
        self.result_dtype = result_dtype
        self.shape = sample_vec.shape
        self.block = sample_vec._block
        self.grid = sample_vec._grid
        self.mem_size = sample_vec.mem_size

        self.kernel, _ = get_linear_combination_kernel(
                arg_count*((False, scalar_dtype, self.vector_dtype),),
                result_dtype)

        if pool:
            self.allocator = pool.allocate
        else:
            self.allocator = None
Пример #5
0
 def make_lc2_kernel(self, dtype, a_is_gpu, b_is_gpu):
     from pycuda.elementwise import get_linear_combination_kernel
     return get_linear_combination_kernel(
         ((a_is_gpu, dtype, dtype), (b_is_gpu, dtype, dtype)), dtype)
Пример #6
0
 def make_lc2_kernel(self, dtype, a_is_gpu, b_is_gpu):
     from pycuda.elementwise import get_linear_combination_kernel
     return get_linear_combination_kernel((
             (a_is_gpu, dtype, dtype),
             (b_is_gpu, dtype, dtype)
             ), dtype)