def gaussianKernels(gs): global program convs = [("gauss%s" % len(a),a) for a in gs] convsres = [("gauss%s_res" % len(a),a) for a in gs] program = kernels.loadProgram(interfaces.convolvesep,convs=convs) krnls = [getattr(program, name) for (name, conv) in convs] for i, (name, conv) in enumerate(convs): krnls[i].res = getattr(program,convsres[i][0]) krnls[i].width = len(conv) return krnls
def test_gradient(): import time a = np.random.sample((1000,1000)).astype(np.float32) t = time.time() b = np.gradient(a) print "Numpy seconds", time.time()-t for engine in (kernels.GPU_ENGINE, kernels.CPU_ENGINE): program = kernels.loadProgram(interfaces.gradient, engine=engine) t = time.time() c = program.gradient(a, 1) print "Engine %s seconds %s" % (engine,time.time()-t)
import numpy as np from rpc import interfaces, kernels program = kernels.loadProgram(interfaces.operators,operators=[("add","+"), ("sub","-"),("mul","*"),("div","/")]) add = program.add add_res = program.add_res sub = program.sub sub_res = program.sub_res mul = program.mul mul_res = program.mul_res div = program.div div_res = program.div_res def test_operators(): # 1+0 -> 1 a = np.ones((100,100), dtype=np.float32) b = np.zeros_like(a) c = add(a,b) d = np.empty_like(a) add_res(a,b,d) assert c.sum() == c.size*1.0 program.read(d) assert a.sum() == c.sum() assert a.sum() == d.sum() # 1-1 == 0 b[:,:] = 1.0 c = sub(a,b) d[:,:]=10 sub_res(a,b,d) program.read(d)
return np.array(im2, np.float32).reshape(im.shape) from rpc import kernels, interfaces def splitChannels(rgb): r, g, b = rgb[:, :, 0], rgb[:, :, 1], rgb[:, :, 2] return r.copy(), g.copy(), b.copy() def joinChannels(r, g, b): shape = list(r.shape) shape.append(3) rgb = np.empty(tuple(shape), dtype=r.dtype) rgb[:, :, 0], rgb[:, :, 1], rgb[:, :, 2] = r.copy(), g.copy(), b.copy() return rgb program = kernels.loadProgram(interfaces.hsi) def rgb2hsi(r, g, b): h, s, i, trace = program.rgb2hsi(r, g, b) #@UnusedVariable return h, s, i def hsi2rgb(h, s, i): r,g,b = program.hsi2rgb(h,s,i) return r,g,b if __name__ == "__main__": from utils import showArray r = np.empty((256, 256), dtype=np.float32) g = np.empty_like(r) b = np.empty_like(r) for i in range(256): r[i, :] = i
import numpy as np from rpc import kernels, interfaces from utils import showArray xy = kernels.loadProgram(interfaces.xy) a = np.zeros((402,798),dtype=np.int32) addr,x,y,label = xy.addr(a) showArray("addr",addr) showArray("x",x) showArray("y", y) showArray("label", label)
''' Created on Jul 22, 2011 @author: seant ''' import numpy as np from rpc import kernels, interfaces program = kernels.loadProgram(interfaces.gradient, engine=kernels.GPU_ENGINE) gradientcl = program.gradient gradient_res = program.gradient_res def gradient(image, reach=1): grad,theta = gradientcl(image, reach) # TODO: gradientCL should not be returning nans, and is theta[np.where(np.isnan(theta))] = 0.0 grad[np.where(np.isnan(grad))] = 0.0 return grad, theta def test_gradient(): import time a = np.random.sample((1000,1000)).astype(np.float32) t = time.time() b = np.gradient(a) print "Numpy seconds", time.time()-t for engine in (kernels.GPU_ENGINE, kernels.CPU_ENGINE): program = kernels.loadProgram(interfaces.gradient, engine=engine) t = time.time() c = program.gradient(a, 1)
''' Provides 2d median filter for 2d arrays Created on Jul 22, 2011 ''' import numpy as np from rpc import kernels, interfaces # Provision this program with just one median filter, fixed length of 9 program = kernels.loadProgram(interfaces.median3x3, width=9, steps=[9]) median3x3cl = program.median3x3 def median3x3slow(image, iterations=1): while iterations > 0: image = median3x3cl(image) iterations -= 1 return image.copy() def median3x3fast(image, iterations=1): if iterations == 1: # One pass through return median3x3cl(image) input = image output = np.zeros_like(input) if iterations == 2: # Send in data, don't retrieve program.first(input, output) input,output = output,input # Don't send in, retrieve data program.last(input, output) return output # Send in data, no retrieve
from rpc import kernels, interfaces import numpy import numpy.linalg as la import time # We don't _need_ specify the dtype: that's done in the interface a = numpy.random.rand(50000) b = numpy.random.rand(50000) t = time.time() # We don't need to manage a compiler and linker, that can be done for us demo = kernels.loadProgram(interfaces.demo) # We don't need to manage buffers, that can be done for us a_plus_b = demo.sum(a,b) print(la.norm(a_plus_b - (a+b)), la.norm(a_plus_b)) print "Elapsed:", time.time() - t # And there are interesting stats available print "Stats", demo
# with minor changes to move to numpy from the obsolete Numeric import numpy as np import time # You can choose a calculation routine below (calc_fractal), uncomment # one of the three lines to test the three variations # Speed notes are listed in the same place # set width and height of window, more pixels take longer to calculate w = 1024 h = 1024 # Use the rpc extension to define and load the kernel as a callable. from rpc import kernels, interfaces calc_fractal_opencl = kernels.loadProgram(interfaces.mandelbrot,engine=kernels.CPU_ENGINE).mandelbrot def calc_fractal_serial(q, maxiter): # calculate z using numpy # this routine unrolls calc_fractal_numpy as an intermediate # step to the creation of calc_fractal_opencl # it runs slower than calc_fractal_numpy z = np.zeros(q.shape, np.complex64) output = np.resize(np.array(0,), q.shape) for i in range(len(q)): for iter in range(maxiter): z[i] = z[i]*z[i] + q[i] if abs(z[i]) > 2.0: q[i] = 0+0j