Python get_dot_kernel示例

编程语言: Python

命名空间/包名称: pycuda.reduction

方法/功能: get_dot_kernel

hotexamples.com的示例: 6

Python get_dot_kernel - 已找到6个示例。这些是从开源项目中提取的最受好评的pycuda.reduction.get_dot_kernel现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： reduction-perf.py 项目： thecobb/PyCUDA

def main():
    from pytools import Table
    tbl = Table()
    tbl.add_row(("type", "size [MiB]", "time [ms]", "mem.bw [GB/s]"))

    from random import shuffle
    for dtype_out in [numpy.float32, numpy.float64]:
        for ex in range(15, 27):
            sz = 1 << ex
            print sz

            from pycuda.curandom import rand as curand
            a_gpu = curand((sz, ))
            b_gpu = curand((sz, ))
            assert sz == a_gpu.shape[0]
            assert len(a_gpu.shape) == 1

            from pycuda.reduction import get_sum_kernel, get_dot_kernel
            krnl = get_dot_kernel(dtype_out, a_gpu.dtype)

            elapsed = [0]

            def wrap_with_timer(f):
                def result(*args, **kwargs):
                    start = cuda.Event()
                    stop = cuda.Event()
                    start.record()
                    f(*args, **kwargs)
                    stop.record()
                    stop.synchronize()
                    elapsed[0] += stop.time_since(start)

                return result

            # warm-up
            for i in range(3):
                krnl(a_gpu, b_gpu)

            cnt = 10

            for i in range(cnt):
                krnl(
                    a_gpu,
                    b_gpu,
                    #krnl(a_gpu,
                    kernel_wrapper=wrap_with_timer)

            bytes = a_gpu.nbytes * 2 * cnt
            secs = elapsed[0] * 1e-3

            tbl.add_row((str(dtype_out), a_gpu.nbytes / (1 << 20),
                         elapsed[0] / cnt, bytes / secs / 1e9))

    print tbl

示例#2

显示文件

文件： reduction-perf.py 项目： DirkHaehnel/pycuda

def main():
    from pytools import Table
    tbl = Table()
    tbl.add_row(("type", "size [MiB]", "time [ms]", "mem.bw [GB/s]"))

    from random import shuffle
    for dtype_out in [numpy.float32, numpy.float64]:
        for ex in range(15,27):
            sz = 1 << ex
            print sz

            from pycuda.curandom import rand as curand
            a_gpu = curand((sz,))
            b_gpu = curand((sz,))
            assert sz == a_gpu.shape[0]
            assert len(a_gpu.shape) == 1

            from pycuda.reduction import get_sum_kernel, get_dot_kernel
            krnl = get_dot_kernel(dtype_out, a_gpu.dtype)

            elapsed = [0]

            def wrap_with_timer(f):
                def result(*args, **kwargs):
                    start = cuda.Event()
                    stop = cuda.Event()
                    start.record()
                    f(*args, **kwargs)
                    stop.record()
                    stop.synchronize()
                    elapsed[0] += stop.time_since(start)

                return result

            # warm-up
            for i in range(3):
                krnl(a_gpu, b_gpu)

            cnt = 10

            for i in range(cnt):
                krnl(a_gpu, b_gpu,
                #krnl(a_gpu, 
                        kernel_wrapper=wrap_with_timer)

            bytes = a_gpu.nbytes*2*cnt
            secs = elapsed[0]*1e-3

            tbl.add_row((str(dtype_out), a_gpu.nbytes/(1<<20), elapsed[0]/cnt, bytes/secs/1e9))

    print tbl

示例#3

显示文件

文件： gpuarray.py 项目： Benli11/pycuda

def dot(a, b, dtype=None, stream=None, allocator=None):
    from pycuda.reduction import get_dot_kernel
    if dtype is None:
        dtype = _get_common_dtype(a, b)
    krnl = get_dot_kernel(dtype, a.dtype, b.dtype)
    return krnl(a, b, stream=stream, allocator=allocator)

示例#4

显示文件

def dot(a, b, dtype=None, stream=None, allocator=None):
    from pycuda.reduction import get_dot_kernel
    if dtype is None:
        dtype = _get_common_dtype(a, b)
    krnl = get_dot_kernel(dtype, a.dtype, b.dtype)
    return krnl(a, b, stream=stream, allocator=allocator)

示例#5

显示文件

文件： gpuarray.py 项目： andrewcron/pycuda

def dot(a, b, dtype=None, stream=None):
    from pycuda.reduction import get_dot_kernel
    krnl = get_dot_kernel(dtype, a.dtype, b.dtype)
    return krnl(a, b, stream=stream)

示例#6

显示文件

def dot(a, b, dtype=None, stream=None):
    from pycuda.reduction import get_dot_kernel
    krnl = get_dot_kernel(dtype, a.dtype, b.dtype)
    return krnl(a, b, stream=stream)