def get_target(lang, device=None, compiler=None): """ Parameters ---------- lang : str One of the supported languages, {'c', 'cuda', 'opencl'} device : :class:`pyopencl.Device` If supplied, and lang is 'opencl', passed to the :class:`loopy.PyOpenCLTarget` compiler: str If supplied, the C-compiler to use Returns ------- The correct loopy target type """ utils.check_lang(lang) # set target if lang == 'opencl': return lp.PyOpenCLTarget(device=device) elif lang == 'c': return lp.ExecutableCTarget(compiler=compiler) elif lang == 'cuda': return lp.CudaTarget() elif lang == 'ispc': return lp.ISPCTarget()
def test_cuda_short_vector(): knl = lp.make_kernel("{ [i]: 0<=i<n }", "out[i] = 2*a[i]", target=lp.CudaTarget()) knl = lp.set_options(knl, write_code=True) knl = lp.split_iname(knl, "i", 4, slabs=(0, 1), inner_tag="vec") knl = lp.split_array_axis(knl, "a,out", axis_nr=0, count=4) knl = lp.tag_array_axes(knl, "a,out", "C,vec") knl = lp.set_options(knl, write_wrapper=True) knl = lp.add_and_infer_dtypes(knl, {"a": np.float32}) print(lp.generate_code_v2(knl).device_code())
import numpy as np import loopy as lp target = lp.CudaTarget() kernel = lp.make_kernel("{ [i_node,j_node]: 0<=i_node,j_node<n_node}", """ <float32> coupling_value = params(1) <float32> speed_value = params(0) <float32> dt=0.1 <float32> M_PI_F = 2.0 <float32> rec_n = 1.0f / n_node <float32> rec_speed_dt = 1.0f / speed_value / dt <float32> omega = 10.0 * 2.0 * M_PI_F / 1e3 <float32> sig = sqrt(dt) * sqrt(2.0 * 1e-5) <float32> rand = 1.0 for i_node tavg[i_node]=0.0f {id = clear} end for i_node <float32> theta_i = state[i_node] {id = coupling1, dep=*} <float32> sum = 0.0 {id = coupling2} for j_node <float32> wij = weights[j_node] {id = coupling3, dep=coupling1:coupling2} if wij != 0.0 <int> dij = lengths[j_node] * rec_speed_dt {id = coupling4, dep=coupling3} <float32> theta_j = state[j_node] sum = sum + wij * sin(theta_j - theta_i)