def test_full_integral_image_correctness(): ''' Test generated full integral image correctness, note that this relies on the corectness of interpreter and reference.py ''' block_size = (20, 20) size = tuple(x*3 for x in block_size) # generate random test image test_image = [[float(random.randint(0, 255)) for i in xrange(size[0])] for j in xrange(size[1])] # reference implementation integral_ref = reference.gen_integral_image(test_image) sq_integral_ref = reference.gen_integral_squared_image(test_image) # pointer config buffer_size = block_size[0]*block_size[1] src_ptr = 0 integral_ptr = buffer_size sq_integral_ptr = 2*buffer_size # set up interpreter for integral image calculation pe_dim = [s//b for s, b in zip(size, block_size)] def code_gen(code, block_size, args): return gen_code.gen_full_integral_image(code, src_ptr, integral_ptr, sq_integral_ptr, pe_dim, block_size) code = Code() code.set_generator(optimiser_wrapper(code_gen), block_size) sim = Interpreter(code, test_image, block_size) sim.run() # get result of simulator with scaling, truncation turned off and float output integral_test = sim.gen_output_image(1, False, False, True) sq_integral_test = sim.gen_output_image(2, False, False, True) # comparison of reference with blip sim integral_err = compare_images(integral_ref, integral_test) sq_integral_err = compare_images(sq_integral_ref, sq_integral_test) err_eps = 0.001 if not ((integral_err < err_eps) and (sq_integral_err < err_eps)): print 'integral comp:', integral_err print 'squared integral comp:', sq_integral_err print 'rendering instruction stream to file, can take a while' try: f = open('unoptimised_full_integral_image_trace.txt', 'w') def tag_str(instr): return ', '.join(instr.tag) if hasattr(instr, 'tag') else '' f.write('\n'.join(str(x).ljust(40) + ' tags: ' + tag_str(x) for x in code_gen(Code()))) f.close() optim_gen = optimiser_wrapper(code_gen, block_size, {}) f = open('bad_full_integral_image_trace.txt', 'w') def tag_str(instr): return ', '.join(instr.tag) if hasattr(instr, 'tag') else '' f.write('\n'.join(str(x).ljust(40) + ' tags: ' + tag_str(x) for x in optim_gen(Code()))) f.close() except Exception, e: print 'could render instruction stream to file' print 'err: ' + str(e) assert False
def test_map_pixel_to_pixel(): def pixel_op(code, pixel_in, pixel_out, args, block_size): th = args['th'] with scoped_alloc(code, 3) as (th_r, v, const_255): yield Imm(th_r, th) yield Cmp(pixel_in, th_r) yield Imm(const_255, 255) yield Mov(pixel_out, const_255, cond='GT') yield Xor(pixel_out, pixel_out, pixel_out, cond='LE') def run_test(image, th): code = Code() block_size = (16, 16) in_ptr = 0 out_ptr = block_size[0]*block_size[1] args = {'th': th} def codegen(code, block_size, args): return map_pixel_to_pixel(code, in_ptr, out_ptr, pixel_op, args, block_size) code.set_generator(codegen, block_size, args) sim = Interpreter(code, image, block_size) sim.run() return sim.gen_output_image(1) def run_ref(image, th): return [[255 if x > th else 0 for x in y] for y in image] th = 100 image = [[random.randint(0, 255) for x in xrange(16)] for y in xrange(16)] res_test = run_test(image, th) res_ref = run_test(image, th) assert compare_images(res_test, res_ref) < 0.01
def _test_image_function_single_pe(codegen, args, ref_implementation): # setttings width, height = 9, 9 # size only effects execution speed so keep it small block_size = (width, height) def run_test(image, args, block_size): im_size = len(image[0]), len(image) bwidth, bheight = block_size assert(im_size == block_size) # only one pe code = Code() code.set_generator(codegen, block_size, args) sim = Interpreter(code, image, block_size) sim.run() output = sim.gen_output_image(1, False) return output image = [[random.randint(0, 255) for x in xrange(width)] for y in xrange(height)] ref_output = ref_implementation(image, args) test_output = run_test(image, args, block_size) if not (compare_images(ref_output, test_output) < 0.01): print 'ref' print '\n'.join(str(y) for y in ref_output) print 'test' print '\n'.join(str(y) for y in test_output) return False return True
def test_get2D_func(): ''' Test if get2D codegen is correct. ''' import random block_size = (5, 5) out_ptr = block_size[0]*block_size[1] im_size = tuple(x*3 for x in block_size) # only single block, this code doesn't account for interblock comm src_image = [[random.randint(0, 255) for _ in xrange(im_size[0])] for _ in xrange(im_size[1])] src = ''' @kernel def main(in_ptr, out_ptr): y = get_local_id(0) x = get_local_id(1) index = %(bwidth)i*y out_ptr[index+x] = get2D(in_ptr, x-1, y-1, %(bwidth)i, %(bheight)i) '''%{'bwidth':block_size[0], 'bheight':block_size[1]} test_out = run_kernel(src, src_image, block_size, {'in_ptr':0, 'out_ptr':out_ptr}, 32) ref_out = [[0 for x in y] for y in src_image] for i, src_row in enumerate(src_image): for j, src_px in enumerate(src_row): x = j-1 y = i-1 if x >= 0 and x < im_size[0] and y >= 0 and y < im_size[1]: ref_out[i][j] = src_image[y][x] assert compare_images(test_out, ref_out) < 0.001
def gen_integral_image_correctness(): ''' test if generated integral image is correct, note that this relies on the corectness of interpreter and reference.py ''' # size = (120, 80) # block_size = (40, 40) size = (80, 80) block_size = size # generate random test image test_image = [[float(random.randint(0, 255)) for i in xrange(size[0])] for j in xrange(size[1])] # reference implementation integral_ref = reference.gen_integral_image(test_image) sq_integral_ref = reference.gen_integral_squared_image(test_image) # pointer config buffer_size = block_size[0]*block_size[1] src_ptr = 0 integral_ptr = buffer_size sq_integral_ptr = 2*buffer_size # set up interpreter for integral image calculation def code_gen(code, block_size, args): return gen_code.gen_integral_image(code, src_ptr, integral_ptr, sq_integral_ptr, block_size) code = Code() code.set_generator(optimiser_wrapper(code_gen), block_size) sim = Interpreter(code, test_image, block_size) sim.run() # get result of simulator with scaling, truncation turned off and float output integral_test = sim.gen_output_image(1, False, False, True) sq_integral_test = sim.gen_output_image(2, False, False, True) # comparison of reference with blip sim integral_err = compare_images(integral_ref, integral_test) sq_integral_err = compare_images(sq_integral_ref, sq_integral_test) err_eps = 0.001 if not ((integral_err < err_eps) and (sq_integral_err < err_eps)): print 'integral comp:', integral_err print 'squared integral comp:', sq_integral_err assert False
def test_full_integral_image_correctness(): ''' Test generated full integral image correctness, note that this relies on the correctness of interpreter and reference.py ''' block_size = (20, 20) size = tuple(x*3 for x in block_size) # generate random test image test_image = [[float(random.randint(0, 255)) for i in xrange(size[0])] for j in xrange(size[1])] # reference implementation integral_ref = reference.gen_integral_image(test_image) sq_integral_ref = reference.gen_integral_squared_image(test_image) # pointer config buffer_size = block_size[0]*block_size[1] src_ptr = 0 integral_ptr = buffer_size sq_integral_ptr = 2*buffer_size # set up interpreter for integral image calculation pe_dim = [s//b for s, b in zip(size, block_size)] code = Code() def code_gen(code, block_size, args): return gen_code.gen_full_integral_image(code, src_ptr, integral_ptr, sq_integral_ptr, pe_dim, block_size) code.set_generator(code_gen, block_size) sim = Interpreter(code, test_image, block_size) sim.run() # get result of simulator with scaling, truncation turned off and float output integral_test = sim.gen_output_image(1, False, False, True) sq_integral_test = sim.gen_output_image(2, False, False, True) # comparison of reference with blip sim integral_err = compare_images(integral_ref, integral_test) sq_integral_err = compare_images(sq_integral_ref, sq_integral_test) err_eps = 0.001 assert (integral_err < err_eps) and (sq_integral_err < err_eps)
def test_conv_3x3_multiple_block(): ''' Simple 2 dimensional convolution test with multiple blocks. ''' import random block_size = (8, 8) out_ptr = block_size[0]*block_size[1] im_size = tuple(x*2 for x in block_size) # only single block, this code doesn't account for interblock comm src_image = [[random.randint(0, 255) for _ in xrange(im_size[0])] for _ in xrange(im_size[1])] src = ''' @kernel def main(in_ptr, out_ptr): coeff = [[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]] y = get_local_id(0) x = get_local_id(1) index = %(bwidth)i*y+x acc = 0 current_y = y - 1 for i in range(3): current_x = x - 1 for j in xrange(3): current_coeff = coeff[i][j] in_v = get2D(in_ptr, current_x, current_y, %(bwidth)i, %(bheight)i) v = current_coeff * in_v acc += v current_x += 1 current_y += 1 out_ptr[index] = acc '''%{'bwidth':block_size[0], 'bheight':block_size[1]} test_out = run_kernel(src, src_image, block_size, {'in_ptr':0, 'out_ptr':out_ptr}, 32) ref_out = [[0 for x in y] for y in src_image] coeffs = [[-1, 0, 1]]*3 for i, src_row in enumerate(src_image): for j, src_px in enumerate(src_row): acc = 0 for k, coeff_row in enumerate(coeffs): y = i + k - 1 for l, coeff in enumerate(coeff_row): x = j + l -1 if x >= 0 and x < len(src_row) and y >= 0 and y < len(src_image): acc += coeff * src_image[y][x] ref_out[i][j] = acc assert compare_images(test_out, ref_out) < 0.001
def test_map_image_to_pixel(): def pixel_op(code, pos, in_ptr, out_ptr, args, block_size): ''' Simple image shift implementation. ''' offset = args['offset'] x, y = pos width, height = block_size c_in_ptr = in_ptr + width*y + (x + offset) c_out_ptr = out_ptr + width*y + x with scoped_alloc(code, 1) as v: for instr in load_mem_value(code, c_in_ptr, pos, v, block_size): yield instr yield MemWImm(c_out_ptr, v) def run_test(image, offset): code = Code() block_size = (16, 16) in_ptr = 0 out_ptr = block_size[0]*block_size[1] args = {'offset' : offset} def codegen(code, block_size, args): return map_image_to_pixel(code, in_ptr, out_ptr, pixel_op, args, block_size) code.set_generator(codegen, block_size, args) sim = Interpreter(code, image, block_size) sim.run() return sim.gen_output_image(1) def run_ref(image, offset): iwidth, iheight = len(image[0]), len(image) res = [[0 for x in xrange(iwidth)] for y in xrange(iheight)] def in_image(j, i): return i >= 0 and i < iheight and j >= 0 and j < iwidth for i, row in enumerate(image): for j, _ in enumerate(row): x, y = j + offset, i res = image[y][x] if in_image(x, y) else 0 return res offset = 2 image = [[random.randint(0, 255) for x in xrange(32)] for y in xrange(32)] res_test = run_test(image, offset) res_ref = run_test(image, offset) assert compare_images(res_test, res_ref) < 0.00001
def test_gray(): ''' Basic test, generate a gray image. ''' block_size = (8, 8) im_size = tuple(x*2 for x in block_size) src_image = [[0 for _ in xrange(im_size[0])] for _ in xrange(im_size[1])] src = ''' @kernel def main(): y = get_local_id(0) x = get_local_id(1) index = y*%(bwidth)i + x out_ptr = %(bwidth)i * %(bheight)i out_ptr[index] = 128 '''%{'bwidth':block_size[0], 'bheight':block_size[1]} test_out = run_kernel(src, src_image, block_size, {}, 16) ref_out = [[128. for _ in xrange(im_size[0])] for _ in xrange(im_size[1])] assert compare_images(test_out, ref_out) < 0.001
def test_map_neighborhood_to_pixel(): def pixel_op(code, mask_val, image_val, acc, args, block_size): ''' Simple convolution implementation. ''' with scoped_alloc(code, 2) as (v, mask_val_r): yield Imm(mask_val_r, mask_val) yield Mul(v, mask_val_r, image_val) yield Add(acc, acc, v) def run_test(image, coeff): code = Code() block_size = (16, 16) in_ptr = 0 out_ptr = block_size[0]*block_size[1] args = {} def codegen(code, block_size, args): return map_neighborhood_to_pixel(code, in_ptr, out_ptr, coeff, pixel_op, args, block_size) code.set_generator(codegen, block_size, args) sim = Interpreter(code, image, block_size) sim.run() return sim.gen_output_image(1) def run_ref(image, coeff): iwidth, iheight = len(image[0]), len(image) res = [[0 for x in xrange(iwidth)] for y in xrange(iheight)] for i, row in enumerate(image): for j, v in enumerate(row): acc = 0 for ii, c_row in enumerate(coeff): if ii >= 0 and ii < iheight: for jj, c in enumerate(c_row): if jj >= 0 and jj < iwidth: acc += c*image[ii][jj] res[j][i] = acc return res coeff = [[-1, 0, 1]]*3 image = [[random.randint(0, 255) for x in xrange(32)] for y in xrange(32)] res_test = run_test(image, coeff) res_ref = run_test(image, coeff) assert compare_images(res_test, res_ref) < 0.00001
def test_conv_3x1(): ''' Simple 1 dimensional convolution test. ''' import random block_size = (8, 8) out_ptr = block_size[0]*block_size[1] im_size = tuple(x*1 for x in block_size) # only single block, this code doesn't account for interblock comm src_image = [[random.randint(0, 255) for _ in xrange(im_size[0])] for _ in xrange(im_size[1])] src = ''' @kernel def main(in_ptr, out_ptr): coeff = [-1, 0, 1] y = get_local_id(0) x = get_local_id(1) index = %(bwidth)i*y acc = 0 current_x = x - 1 for i in range(3): in_v = in_ptr[index+current_x] acc += (coeff[i] * in_v if current_x < %(bwidth)i else 0) if current_x >= 0 else 0 current_x += 1 out_ptr[index+x] = acc '''%{'bwidth':block_size[0]} test_out = run_kernel(src, src_image, block_size, {'in_ptr':0, 'out_ptr':out_ptr}, 16) ref_out = [[0 for x in y] for y in src_image] coeffs = [-1, 0, 1] for i, src_row in enumerate(src_image): for j, src_px in enumerate(src_row): acc = 0 for k, coeff in enumerate(coeffs): x = j + k -1 if x >= 0 and x < len(src_row): acc += coeff * src_row[x] ref_out[i][j] = acc assert compare_images(test_out, ref_out) < 0.001
def test_treshold(): import random block_size = (8, 8) th = 100 out_ptr = block_size[0]*block_size[1] im_size = tuple(x*2 for x in block_size) src_image = [[random.randint(0, 255) for _ in xrange(im_size[0])] for _ in xrange(im_size[1])] src = ''' @kernel def main(in_ptr, out_ptr, th): y = get_local_id(0) x = get_local_id(1) index = %(bwidth)i*y + x in_v = in_ptr[index] out_v = 255 if in_v > th else 0 out_ptr[index] = out_v '''%{'bwidth':block_size[0]} test_out = run_kernel(src, src_image, block_size, {'in_ptr':0, 'out_ptr':out_ptr, 'th':th}, 16) ref_out = [[255 if x > th else 0 for x in y] for y in src_image] assert compare_images(test_out, ref_out) < 0.001