def chunkwise_kernel(): ast, env = compile(source) #Array = ca.carray(xrange(25000), rootdir='example1', mode='w', #dtype='int32', cparams=ca.cparams(clevel=0)) Array = open('example1', mode='w') c = Array.data.ca ctx = Context(env) for i in range(c.nchunks): chunk = c.chunks[i] # read only access #x = c.chunks[0][:] # write access x = view(chunk) size = x.strides[0] args = (x, size) execute(ctx, args, fname='main') # this does a _save() behind the scenes c.chunks[i] = chunk ctx.destroy() rts = Runtime(1,2,3) rts.join() print Array
def chunked_eval(blz_expr, chunk_size=32768): operands, code = blz_expr.gen_blir() total_size = _dimension(operands) temps = [_temp_for(i, chunk_size) for i in operands] temp_op = [ i for i in zip(temps, operands) if isinstance(i[1], blaze.Array) ] offset = 0 accum = 0.0 _, env = blir.compile(code) ctx = blir.Context(env) while offset < total_size: curr_chunk_size = min(total_size - offset, chunk_size) slice_chunk = slice(0, curr_chunk_size) slice_src = slice(offset, offset + curr_chunk_size) for temp, op in temp_op: temp[slice_chunk] = op[slice_src] accum += blir.execute(ctx, args=temps + [curr_chunk_size], fname='main') offset = slice_src.stop ctx.destroy() return accum
def test_cgen2_add(): with namesupply(): krn = ElementwiseKernel( [ (IN , VectorArg((300,), 'array[int]')), (IN , VectorArg((300,), 'array[int]')), (OUT , VectorArg((300,), 'array[int]')), ], '_out0[i0] = _in0[i0] + _in1[i0]', ) krn.verify() ast, env = krn.compile() ctx = Context(env) a = np.array(xrange(300), dtype='int32') b = np.array(xrange(300), dtype='int32') c = np.empty_like(b) execute(ctx, args=(a,b,c), fname='kernel0', timing=False) assert np.allclose(c, a + b)
def chunked_eval(blz_expr, chunk_size=32768): operands, code = blz_expr.gen_blir() total_size = _dimension(operands) temps = [_temp_for(i, chunk_size) for i in operands] temp_op = [i for i in zip(temps, operands) if isinstance(i[1], blaze.Array)] offset = 0 accum = 0.0 _, env = blir.compile(code) ctx = blir.Context(env) while offset < total_size: curr_chunk_size = min(total_size - offset, chunk_size) slice_chunk = slice(0, curr_chunk_size) slice_src = slice(offset, offset + curr_chunk_size) for temp, op in temp_op: temp[slice_chunk] = op[slice_src] accum += blir.execute(ctx, args=temps + [curr_chunk_size], fname="main") offset = slice_src.stop ctx.destroy() return accum
for i in range(n) { for j in range(n) { x[i,j] = i+j; } } } """ N = 15 ast, env = compile(source) arr = np.eye(N, dtype='int32') args = (arr, N) ctx = Context(env) execute(ctx, args, timing=True) ctx.destroy() print arr #------------------------------------------------------------------------ # Vector Dot Product #------------------------------------------------------------------------ N = 50000 A = np.arange(N, dtype='double') B = np.arange(N, dtype='double') source = open('samples/blir/dot.bl') ast, env = compile(source.read())