示例#1
0
    def __init__(self, lst_svobjs, objIdOffset):
        self.m_size = vki.SVUInt32(len(lst_svobjs))
        self.m_buf = vki.SVObjBuffer(lst_svobjs)
        self.m_id_offset = vki.SVUInt32(objIdOffset)
        self.m_cptr = SVCombine_Create(
            {
                'size': self.m_size,
                'data': self.m_buf,
                'id_offset': self.m_id_offset
            }, '''
uint get_size(in Comb_#hash# vec)
{{
     return vec.size;
}}

{0} get_value(in Comb_#hash# vec, in uint id)
{{
    return vec.data[id].v;
}}
'''.format(self.name_elem_type()))
示例#2
0
kernel = vki.Computer(['dst', 'src', 'n'],
'''
shared {0} s_buf[{1}];
void main()
{{
	uint tid = gl_LocalInvocationID.x;
	uint i = gl_GlobalInvocationID.x;
	if (i<n) s_buf[tid] = get_value(src, i);
	barrier();
	for (uint s = {1}/2; s>0; s>>=1)
	{{
		if (tid < s && i+s<n)
    		s_buf[tid] += s_buf[tid + s];
    	barrier();
	}}
	if (tid==0) set_value(dst, gl_WorkGroupID.x, s_buf[tid]);	
}}
'''.format('int',str(BLOCK_SIZE)))

dst  = darr
while dst.size()>1:
	src = dst
	n = src.size()
	blocks = int((n + BLOCK_SIZE - 1) / BLOCK_SIZE)
	dst = vki.SVVector("int", blocks)
	kernel.launch(blocks, BLOCK_SIZE, [dst, src, vki.SVUInt32(n)])

print(dst.to_host()[0])