def tune_variance_zero_mean(): with open(get_kernel_path() + 'wienerfilter.cu', 'r') as f: kernel_string = f.read() image = imread(get_testdata_path() + "test.jpg", mode="F") height = np.int32(image.shape[0]) width = np.int32(image.shape[1]) size = np.int32(height * width) tune_params = OrderedDict() tune_params["block_size_x"] = [2**i for i in range(5, 11)] tune_params["num_blocks"] = [2**i for i in range(5, 11)] max_blocks = max(tune_params["num_blocks"]) output = np.zeros(max_blocks, dtype=np.float32) args = [size, output, image] problem_size = ("num_blocks", 1) tune_kernel("computeVarianceZeroMean", kernel_string, problem_size, args, tune_params, grid_div_x=[], verbose=True)
def test_wiener(): with open(get_kernel_path() + 'wienerfilter.cu', 'r') as f: kernel_string = f.read() image = imread(get_testdata_path() + "test.jpg", mode="F") height = np.int32(image.shape[0]) width = np.int32(image.shape[1]) problem_size = (width, height) output = np.zeros(problem_size, dtype=np.float32) args = [height, width, output, image] params = OrderedDict() params["block_size_x"] = 32 params["block_size_y"] = 8 params["reuse_computation"] = 1 answer = run_kernel("computeVarianceEstimates", kernel_string, problem_size, args, params, grid_div_y=["block_size_y"]) reference = run_kernel("computeVarianceEstimates_naive", kernel_string, problem_size, args, params, grid_div_y=["block_size_y"]) assert np.allclose(answer[2], reference[2], atol=1e-6)
def tune_wiener(): with open(get_kernel_path() + 'wienerfilter.cu', 'r') as f: kernel_string = f.read() image = imread(get_testdata_path() + "test.jpg", mode="F") height = np.int32(image.shape[0]) width = np.int32(image.shape[1]) problem_size = (width, height) output = np.zeros(problem_size, dtype=np.float32) args = [height, width, output, image] tune_params = OrderedDict() tune_params["block_size_x"] = [32 * i for i in range(1, 33)] tune_params["block_size_y"] = [2**i for i in range(6)] #first the naive kernel #tune_kernel("computeVarianceEstimates_naive", kernel_string, problem_size, args, tune_params, grid_div_y=["block_size_y"]) #more sophisticated kernel tune_params["reuse_computation"] = [0, 1] tune_kernel("computeVarianceEstimates", kernel_string, problem_size, args, tune_params, grid_div_y=["block_size_y"])
def tune_fastnoise(): with open(get_kernel_path() + 'fastnoisefilter.cu', 'r') as f: kernel_string = f.read() image = imread(get_testdata_path() + "test.jpg", mode="F") height = np.int32(image.shape[0]) width = np.int32(image.shape[1]) problem_size = (width, height) output = np.zeros(problem_size, dtype=np.float32) args = [height, width, output, image] tune_params = OrderedDict() tune_params["block_size_x"] = [32 * i for i in range(1, 33)] tune_params["block_size_y"] = [2**i for i in range(6)] kernels = [ "normalized_gradient", "gradient", "convolveHorizontally", "convolveVertically", "normalize" ] for k in kernels: tune_kernel(k, kernel_string, problem_size, args, tune_params)
def test_find_peak(): with open(get_kernel_path() + 'peaktocorrelationenergy.cu', 'r') as f: kernel_string = f.read() image = imread(get_testdata_path() + "test_small.jpg", mode="F") height = np.int32(image.shape[0]) width = np.int32(image.shape[1]) problem_size = (width, height) #generate some bogus crosscorr data crosscorr = np.random.randn(height, width, 2).astype(np.float32) #compute reference in Python peak_index = np.argmax(np.absolute(crosscorr[:, :, 0])) peak_value = np.absolute(crosscorr[:, :, 0].flatten()[peak_index]) params = {"block_size_x": 512, "num_blocks": 64} problem_size = ("num_blocks", 1) num_blocks = np.int32(params["num_blocks"]) peakval = np.zeros((1), dtype=np.float32) peakvals = np.zeros((num_blocks), dtype=np.float32) peakindx = np.zeros((num_blocks), dtype=np.int32) loc = np.zeros((1), dtype=np.int32) val = np.zeros((1), dtype=np.float32) args = [height, width, peakval, peakvals, peakindx, crosscorr] output1 = run_kernel("findPeak", kernel_string, problem_size, args, params, grid_div_x=[]) peakvals = output1[3] peakindx = output1[4] args = [loc, val, peakindx, peakvals, num_blocks] output2 = run_kernel("maxlocFloats", kernel_string, (1, 1), args, params, grid_div_x=[]) loc = output2[0][0] val = output2[1][0] print("answer") print("loc=", loc, "val=", val) print("reference") print("loc=", peak_index, "val=", peak_value) assert loc == peak_index assert np.isclose(val, peak_value, atol=1e-6)
def tune_zeromean(): with open(get_kernel_path() + 'zeromeantotalfilter.cu', 'r') as f: kernel_string = f.read() image = imread(get_testdata_path() + "test.jpg", mode="F") height = np.int32(image.shape[0]) width = np.int32(image.shape[1]) tune_vertical(kernel_string, image, height, width) tune_horizontal(kernel_string, image, height, width) tune_transpose(kernel_string, image, height, width)
def tune_pce(): with open(get_kernel_path()+'peaktocorrelationenergy.cu', 'r') as f: kernel_string = f.read() image = imread(get_testdata_path() + "Pentax_OptioA40_0_30731.JPG", mode="F") image = fastnoise(image) image2 = imread(get_testdata_path() + "Pentax_OptioA40_0_30757.JPG", mode="F") image2 = fastnoise(image2) height = np.int32(image.shape[0]) width = np.int32(image.shape[1]) image_freq, image2_freq = tune_complex_and_flip(kernel_string, height, width, image, image2) crosscorr = tune_crosscorr(kernel_string, height, width, image_freq, image2_freq) loc, val = tune_find_peak(kernel_string, height, width, crosscorr) energy = tune_energy(kernel_string, height, width, crosscorr, loc) pce_score = (val[0] * val[0]) / energy print("Finished tuning PCE, pce_score=", pce_score)
def test_fastnoise(): with open(get_kernel_path()+'fastnoisefilter.cu', 'r') as f: kernel_string = f.read() image = imread(get_testdata_path() + "test.jpg", mode="F") height = np.int32(image.shape[0]) width = np.int32(image.shape[1]) problem_size = (width, height) output1 = np.zeros_like(image) output2 = np.zeros_like(image) output3 = np.zeros_like(image) args = [height, width, output1, output2, image] params = OrderedDict() params["block_size_x"] = 32 params["block_size_y"] = 16 d = np.gradient(image) norm = np.sqrt( (d[0]*d[0]) + (d[1]*d[1]) ) scale = 1.0 / (1.0 + norm) dys = d[0] * scale dxs = d[1] * scale answer = run_kernel("normalized_gradient", kernel_string, problem_size, args, params) assert np.allclose(answer[2], dxs, atol=1e-6) assert np.allclose(answer[3], dys, atol=1e-6) args = [height, width, output3, dxs, dys] answer = run_kernel("gradient", kernel_string, problem_size, args, params) reference = np.gradient(dys, axis=0) + np.gradient(dxs, axis=1) assert np.allclose(answer[2], reference, atol=1e-6)
def test_complex_and_flip2(): with open(get_kernel_path() + 'peaktocorrelationenergy.cu', 'r') as f: kernel_string = f.read() image = imread(get_testdata_path() + "test_small.jpg", mode="F") height = np.int32(image.shape[0]) width = np.int32(image.shape[1]) problem_size = (width, height) output = np.zeros((height, width, 2), dtype=np.float32) args = [height, width, output, output, image, image] params = OrderedDict() params["block_size_x"] = 32 params["block_size_y"] = 16 answer = run_kernel("toComplexAndFlip2", kernel_string, problem_size, args, params, grid_div_y=["block_size_y"], grid_div_x=["block_size_x"]) output1 = answer[2].reshape(height, width, 2) output1 = output1[:, :, 0] + 1j * output[:, :, 1] reference1 = image + 1j * np.zeros((height, width), dtype=np.float32) assert np.allclose(output1, reference1, atol=1e-6) reference2 = image.flatten()[::-1].reshape(height, width) reference2 = reference2 output2 = answer[3].reshape(height, width, 2) assert np.allclose(output2[:, :, 0], reference2, atol=1e-6) assert np.allclose(output2[:, :, 1], np.zeros((height, width), dtype=np.float32), atol=1e-6)
def test_variance_zero_mean(): with open(get_kernel_path() + 'wienerfilter.cu', 'r') as f: kernel_string = f.read() image = imread(get_testdata_path() + "test.jpg", mode="F") height = np.int32(image.shape[0]) width = np.int32(image.shape[1]) size = np.int32(height * width) params = OrderedDict() params["block_size_x"] = 512 params["num_blocks"] = 64 num_blocks = params["num_blocks"] output = np.zeros(num_blocks, dtype=np.float32) args = [size, output, image] problem_size = ("num_blocks", 1) answer = run_kernel("computeVarianceZeroMean", kernel_string, problem_size, args, params, grid_div_x=[]) print("answer:") ans = np.sum(answer[1]) print(ans, answer[1]) print("reference:") reference = np.sum(image * image) print(reference) assert np.isclose(ans, reference, atol=1e-6)