Python assemble_code示例，util.assemble_code Python示例

示例#1

0

显示文件

文件： mwc.py 项目： twobombs/cuburn

def test_mwc(rounds=5000, nblocks=64, blockwidth=512):
    import pycuda.driver as cuda
    from pycuda.compiler import SourceModule
    import time

    nthreads = blockwidth * nblocks
    seeds = make_seeds(nthreads, host_seed=42)
    dseeds = cuda.to_device(seeds)

    mod = SourceModule(assemble_code(mwctestlib))

    for trial in range(2):
        print "Trial %d, on CPU: " % trial,
        sums = np.zeros(nthreads, dtype=np.uint64)
        ctime = time.time()
        mults = seeds[:, 0].astype(np.uint64)
        states = seeds[:, 1]
        carries = seeds[:, 2]
        for i in range(rounds):
            step = np.frombuffer((mults * states + carries).data,
                                 dtype=np.uint32).reshape((nthreads, 2))
            states[:] = step[:, 0]
            carries[:] = step[:, 1]
            sums += states

        ctime = time.time() - ctime
        print "Took %g seconds." % ctime

        print "Trial %d, on device: " % trial,
        dsums = cuda.mem_alloc(8 * nthreads)
        fun = mod.get_function("test_mwc")
        dtime = fun(dseeds,
                    dsums,
                    np.float32(rounds),
                    block=(blockwidth, 1, 1),
                    grid=(nblocks, 1),
                    time_kernel=True)
        print "Took %g seconds." % dtime
        dsums = cuda.from_device(dsums, nthreads, np.uint64)
        if not np.all(np.equal(sums, dsums)):
            print "Sum discrepancy!"
            print sums
            print dsums

示例#2

0

显示文件

文件： mwc.py 项目： vincentmele/cuburn

def test_mwc(rounds=5000, nblocks=64, blockwidth=512):
    import pycuda.driver as cuda
    from pycuda.compiler import SourceModule
    import time

    nthreads = blockwidth * nblocks
    seeds = make_seeds(nthreads, host_seed=42)
    dseeds = cuda.to_device(seeds)

    mod = SourceModule(assemble_code(mwctestlib))

    for trial in range(2):
        print "Trial %d, on CPU: " % trial,
        sums = np.zeros(nthreads, dtype=np.uint64)
        ctime = time.time()
        mults = seeds[0].astype(np.uint64)
        states = seeds[1]
        carries = seeds[2]
        for i in range(rounds):
            step = np.frombuffer((mults * states + carries).data,
                       dtype=np.uint32).reshape((2, nthreads), order='F')
            states[:] = step[0]
            carries[:] = step[1]
            sums += states

        ctime = time.time() - ctime
        print "Took %g seconds." % ctime

        print "Trial %d, on device: " % trial,
        dsums = cuda.mem_alloc(8*nthreads)
        fun = mod.get_function("test_mwc")
        dtime = fun(dseeds, dsums, np.float32(rounds),
                    block=(blockwidth,1,1), grid=(nblocks,1),
                    time_kernel=True)
        print "Took %g seconds." % dtime
        dsums = cuda.from_device(dsums, nthreads, np.uint64)
        if not np.all(np.equal(sums, dsums)):
            print "Sum discrepancy!"
            print sums
            print dsums

示例#3

0

显示文件

文件： interp.py 项目： twobombs/cuburn

test_cr(const float *times, const float *knots, const float *t, float *r) {
    int i = threadIdx.x + blockDim.x * blockIdx.x;
    r[i] = catmull_rom(times, knots, t[i]);
}
''')

if __name__ == "__main__":
    # Test spline evaluation. This code will probably drift pretty often.
    import pycuda.driver as cuda
    from pycuda.compiler import SourceModule
    import pycuda.autoinit
    from cuburn.genome import SplEval

    gp = GenomePacker("unused")
    gp.finalize()
    mod = SourceModule(assemble_code(BaseCode, gp))
    times = np.sort(np.concatenate(([-2.0, 0.0, 1.0, 3.0], np.random.rand(12))))
    knots = np.random.randn(16)

    print times
    print knots

    evaltimes = np.float32(np.linspace(0, 1, 1024))
    sp = SplEval([x for k in zip(times, knots) for x in k])
    vals = np.array([sp(t) for t in evaltimes], dtype=np.float32)

    dtimes = np.empty((32,), dtype=np.float32)
    dtimes.fill(1e9)
    dtimes[:16] = times
    dknots = np.zeros_like(dtimes)
    dknots[:16] = knots

示例#4

0

显示文件

文件： interp.py 项目： stevenrobertson/cuburn

    int i = threadIdx.x + blockDim.x * blockIdx.x;
    r[i] = catmull_rom(times, knots, t[i]);
}
"""
)

if __name__ == "__main__":
    # Test spline evaluation. This code will probably drift pretty often.
    import pycuda.driver as cuda
    from pycuda.compiler import SourceModule
    import pycuda.autoinit
    from cuburn.genome import SplEval

    gp = GenomePacker("unused")
    gp.finalize()
    mod = SourceModule(assemble_code(BaseCode, gp))
    times = np.sort(np.concatenate(([-2.0, 0.0, 1.0, 3.0], np.random.rand(12))))
    knots = np.random.randn(16)

    print times
    print knots

    evaltimes = np.float32(np.linspace(0, 1, 1024))
    sp = SplEval([x for k in zip(times, knots) for x in k])
    vals = np.array([sp(t) for t in evaltimes], dtype=np.float32)

    dtimes = np.empty((32,), dtype=np.float32)
    dtimes.fill(1e9)
    dtimes[:16] = times
    dknots = np.zeros_like(dtimes)
    dknots[:16] = knots