def test_th_code(): src = ''' @kernel def main(in_ptr, out_ptr, th): y = get_local_id(0) x = get_local_id(1) index = 16*y + x in_v = in_ptr[index] out_v = 255 if in_v > th else 0 out_ptr[index] = out_v ''' comp = CompilerDriver(8) main_object = comp.run(src) code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm r2,0 lid r4,r2 imm r2,1 lid r5,r2 imm r2,16 mul r6,r2,r4 add r2,r6,r5 add r4,r3,r2 memr r3,r4 imm r4,255 imm r5,0 cmp r3,r0 mov r0,r5 mov {GT} r0,r4 add r3,r1,r2 memw r3,r0 '''.strip() assert match_code(code, pattern)
def test_simple_loop_emit(): compiler = Compiler() src = ''' @kernel def main(): b = 5 for i in range(2): b = i ''' kernel_objects= compiler.compile(src) main_object = kernel_objects[0] code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm b@0, 5 ximm tmp_0@0, 2 xlabel loop_intro_1 ximm i@0, 0 ximm inc_1@0, 1 xlabel for_2 ximm tmp_3@0, 0 xemit tmp_5@0, i@1 imm tmp_6@0, 0 xcmp i@1, tmp_3@0 xphi {EQ} tmp_8@0, i@0, i@1 xmov b@1, tmp_8@0 xadd i@1, tmp_8@0, inc_1@0 xcmp i@1, tmp_0@0 xjmp {LT} for_2 '''.strip() assert match_code(code, pattern)
def test_peephole_addorsub_zero(): ''' Test if an addition/subtraction with a zero operand is converted into a mov instr. ''' src = ''' @kernel def main(a): b = a | 0 c = a + 0 d = a - 0 e = 0 - a # unoptimised case f = 4 g = 3 h = f + g i = f - g ''' compiler = Compiler() main_object = compiler.compile(src)[0] main_object = compiler.opt_peephole(main_object) code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm tmp_0@0,0 mov b@0,a@0 imm tmp_1@0,0 mov c@0,a@0 imm tmp_2@0,0 mov d@0,a@0 imm tmp_3@0,0 sub e@0,tmp_3@0,a@0 imm f@0,4 imm g@0,3 imm h@0,7 imm i@0,1 '''.strip() assert match_code(code, pattern)
def test_builtin_transfer(): ''' Test the transfer builtin functions. ''' compiler = Compiler() src = ''' @kernel def main(): a = 3 b = transferFromNorth(a) c = transferFromEast(b) d = transferFromSouth(c) e = transferFromWest(d) ''' kernel_objects= compiler.compile(src) main_object = kernel_objects[0] code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm a@0, 3 mov out, a@0 mov b@0, north mov out, b@0 mov c@0, east mov out, c@0 mov d@0, south mov out, d@0 mov d@0, west '''.strip() assert match_code(code, pattern)
def test_peephole_twoconstant_sources(): src = ''' @kernel def main(): a = 1 b = 12 c = a + b d = b - a e = a | b a = -2 b = 3 f = b & a g = a * b ''' compiler = Compiler() main_object = compiler.compile(src)[0] main_object = compiler.opt_peephole(main_object) code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm a@0,1 imm b@0,12 imm c@0,13 imm d@0,11 imm e@0,13 imm a@1,-2 imm b@1,3 imm f@0,2 imm g@0,-6 '''.strip() assert match_code(code, pattern)
def test_convert_to_ssa_instr_phi(): ''' Test ssa conversion with a more complicated phi case. ''' args = {} block_size = (4, 4) def codegen(code, block_size, args): with scoped_alloc(code, 4) as (a, b, c, d): yield Imm(a, 1) yield Imm(b, 2) yield Cmp(a, b) yield Inv(c, a, cond='LE') yield Imm(a, 3) # to check if a is captured before new assignment yield Mov(c, b, cond='GT') yield Mov(d, c) expect = ''' imm r0~0, 1 imm r1~0, 2 cmp r0~0, r1~0 inv tmp_1, r0~0 imm r0~1, 3 mov tmp_0, r1~0 phi{LE} r2~1, tmp_1, tmp_0 mov r3~1, r2~1 ''' convertor = CodegenToSSAConvertor(100) ref_code = convert_code_to_compact_repr(list(codegen(Code(), block_size, args))) test_code = convert_compact_repr_to_obj(convertor._convert_code_to_ssa(ref_code)[0]) assert match_code(test_code, expect)
def test_logic_variable2(): ''' Test if a delayed condition captures the correct version of the tested variable. ''' src = ''' @kernel def main(b): b_gt = b > 0 d = 4 if b_gt else b b += 1 c = 3 if b_gt else 1 ''' comp = CompilerDriver(8) main_object = comp.run(src) code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm r1,0 imm r2,4 cmp r0,r1 mov r3,r0 mov {GT} r3,r2 imm r2,1 add r4,r0,r2 imm r2,3 imm r5,1 cmp r0,r1 mov r0,r5 mov {GT} r0,r2 '''.strip() assert match_code(code, pattern)
def test_convert_to_ssa_mov_phi_long(): ''' Test ssa conversion with a simple phi case over trace fragments. ''' args = {} block_size = (4, 4) def codegen(code, block_size, args): for i in xrange(2): with scoped_alloc(code, 4) as (a, b, c, d): yield Imm(a, 1) yield Imm(b, 2) yield Cmp(a, b) yield Mov(c, a) yield Imm(a, 3) # to check if a is captured before new assignment yield Mov(c, b, cond='GT') yield Mov(d, c) expect = ''' imm r0~0, 1 imm r1~0, 2 cmp r0~0, r1~0 imm r0~1, 3 phi{GT} r2~1, r1~0, r0~0 mov r3~1, r2~1 imm r0~2, 1 imm r1~1, 2 cmp r0~2, r1~1 imm r0~3, 3 phi{GT} r2~2, r1~1, r0~2 mov r3~2, r2~2 ''' block_len = 4 convertor = CodegenToSSAConvertor(block_len) test_code_total = [] for i, current_fragment in enumerate(convertor.gen_ssa_fragments(codegen, Code(), block_size, args)): test_code = convert_compact_repr_to_obj(current_fragment.instructions) for x in test_code: test_code_total.append(x) assert match_code(test_code_total, expect)
def test_get2D_copy_propagation(): ''' Test the get2D builtin, this code fetches a value from a 2D buffer with boundary handling. The difference with the previous test is that it also runs the copy propagation optimiser. ''' compiler = Compiler() main_object = compiler.compile(get2D_simple_src)[0] print '\n'.join(str(InstrAdapter(x)) for x in main_object.code) main_object = compiler.opt_copy_propagation(main_object) code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm in_ptr@0, 0 imm bwidth@0, 8 imm bheight@0, 8 imm tmp_0@0, 0 lid y@0, tmp_0@0 imm tmp_1@0, 1 lid x@0, tmp_1@0 imm cg_tmp_1@0, 0 imm cg_tmp_2@0, 0 add cg_tmp_3@0, x@0, bwidth@0 cmp x@0, cg_tmp_1@0 phi {LT} cg_tmp_6@0, cg_tmp_3@0, x@0 sub cg_tmp_7@0, cg_tmp_6@0, bwidth@0 cmp x@0, bwidth@0 phi {GE} cg_tmp_8@0, cg_tmp_7@0, cg_tmp_6@0 add cg_tmp_9@0, y@0, bheight@0 cmp y@0, cg_tmp_2@0 phi {LT} cg_tmp_12@0, cg_tmp_9@0, y@0 sub cg_tmp_13@0, cg_tmp_12@0, bheight@0 cmp y@0, bheight@0 phi {GE} cg_tmp_14@0, cg_tmp_13@0, cg_tmp_12@0 mul cg_tmp_15@0, cg_tmp_14@0, bwidth@0 add cg_tmp_16@0, cg_tmp_15@0, cg_tmp_8@0 add cg_tmp_17@0, in_ptr@0, cg_tmp_16@0 memr cg_tmp_19@0, cg_tmp_17@0 mov out, cg_tmp_19@0 mov cg_tmp_20@0, west cmp x@0, cg_tmp_1@0 phi {LT} cg_tmp_21@0, cg_tmp_20@0, cg_tmp_19@0 mov out, cg_tmp_21@0 mov cg_tmp_22@0, east cmp x@0, bwidth@0 phi {GE} cg_tmp_23@0, cg_tmp_22@0, cg_tmp_21@0 mov out, cg_tmp_23@0 mov cg_tmp_24@0, north cmp y@0, cg_tmp_2@0 phi {LT} cg_tmp_25@0, cg_tmp_24@0, cg_tmp_23@0 mov out, cg_tmp_25@0 mov cg_tmp_26@0, south cmp y@0, bheight@0 phi {GE} cg_tmp_27@0, cg_tmp_26@0, cg_tmp_25@0 ''' assert match_code(code, pattern)
def test_2D_const_list(): ''' Test code generation with 2D constants list. Note that this looks easy to optimise, but it is not so simple, because q is only know right before execution. ''' from blip.code.BlipCompiler import CompilerDriver, Compiler src = ''' @kernel def main(q): a = [[1, 2, 3], [4, 5, 6]] acc = 0 for i in range(2): for j in range(3): acc += q*a[i][j] return acc ''' comp = CompilerDriver(16, no_sequencer=True) main_object = comp.run(src) patched_object = Compiler.patch_arguments_before_run(main_object, [41]) code = [InstrAdapter(x) for x in patched_object.code] pattern = ''' imm r0,41 imm r1,0 imm r2,0 imm r3,0 imm r4,1 mul r5,r0,r4 add r4,r1,r5 imm r1,1 imm r5,2 mul r6,r0,r5 add r5,r4,r6 imm r4,2 imm r6,3 mul r7,r0,r6 add r6,r5,r7 imm r5,1 imm r7,0 imm r8,4 mul r9,r0,r8 add r8,r6,r9 imm r6,1 imm r9,5 mul r10,r0,r9 add r9,r8,r10 imm r8,2 imm r10,6 mul r11,r0,r10 add r0,r9,r11 mov r9,r0 '''.strip() assert match_code(code, pattern)
def test_const_array_access(): src = ''' @kernel def main(): r = [1, 2, 3] b = r[1] ''' comp = CompilerDriver(8) main_object = comp.run(src) code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm r0,1 imm r1,2 '''.strip() assert match_code(code, pattern)
def test_phi_codegen(): from blip.code.BlipCompiler import NamedValue, PhiValue, Codegenerator, Comparison kernelObject = KernelObject('test') kernelObject.values = [\ (NamedValue('cmp'), Comparison(NamedValue('x'), 'Gt', NamedValue('y'))), (NamedValue('z'), PhiValue(NamedValue('cmp'), NamedValue('x'), NamedValue('y'))) ] codegen = Codegenerator(Compiler()) kernelObject = codegen.gen_code(kernelObject) code = [InstrAdapter(x) for x in kernelObject.code] pattern = ''' cmp x, y phi {GT} z, x, y ''' assert match_code(code, pattern)
def test_jmp_codegen(): from blip.code.BlipCompiler import NamedValue, JmpValue, Codegenerator, Comparison kernelObject = KernelObject('test') kernelObject.values = [\ (NamedValue('cmp', seq_value=True), Comparison(NamedValue('x', seq_value=True), 'Gt', NamedValue('y', seq_value=True))), (NamedValue('tmp', seq_value=True), JmpValue(NamedValue('cmp'), NamedValue('for_3'))) ] codegen = Codegenerator(Compiler()) kernelObject = codegen.gen_code(kernelObject) code = [InstrAdapter(x) for x in kernelObject.code] pattern = ''' xcmp x, y xjmp {GT} for_3 ''' assert match_code(code, pattern)
def test_builtin_loadwest(): compiler = Compiler() src = ''' @kernel def main(): b = loadWest() ''' kernel_objects= compiler.compile(src) main_object = kernel_objects[0] main_object = compiler.replace_phi_nodes(main_object) code = [InstrAdapter(x) for x in main_object.code] pattern = ''' mov b@0, west '''.strip() assert match_code(code, pattern)
def test_inplace_operator(): compiler = Compiler() src = ''' @kernel def main(): b = 5 b += 1 ''' kernel_objects= compiler.compile(src) main_object = kernel_objects[0] code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm b@0,5 imm tmp_0@0,1 add b@1,b@0,tmp_0@0 '''.strip() assert match_code(code, pattern)
def test_compile_add(): compiler = Compiler() src = ''' @kernel def main(): b = 4 a = b + 2 ''' kernel_objects= compiler.compile(src) main_object = kernel_objects[0] code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm b@0,4 imm tmp_0@0,2 add a@0,b@0,tmp_0@0 '''.strip() assert match_code(code, pattern)
def test_convert_to_ssa_easy(): ''' Test for the simple case without any branches. ''' args = {} block_size = (4, 4) def codegen(code, block_size, args): with scoped_alloc(code, 2) as (a, b): yield Imm(a, 1) yield Imm(b, 2) yield Add(a, a, b) expect = ''' imm r0~0, 1 imm r1~0, 2 add r0~1, r0~0, r1~0 ''' convertor = CodegenToSSAConvertor(100) ref_code = convert_code_to_compact_repr(list(codegen(Code(), block_size, args))) test_code = convert_compact_repr_to_obj(convertor._convert_code_to_ssa(ref_code)[0]) assert match_code(test_code, expect)
def test_patch_arguments(): from blip.code.BlipCompiler import CompilerDriver, Compiler src = ''' @kernel def main(q): return q + 1 ''' comp = CompilerDriver(8) main_object = comp.run(src) patched_object = Compiler.patch_arguments_before_run(main_object, [41]) code = [InstrAdapter(x) for x in patched_object.code] pattern = ''' imm r0,41 imm r1,1 add r2,r0,r1 mov _, r2 '''.strip() assert match_code(code, pattern)
def test_builtin_sendout(): compiler = Compiler() src = ''' @kernel def main(): a = 3 sendOut(a) ''' kernel_objects= compiler.compile(src) main_object = kernel_objects[0] main_object = compiler.replace_phi_nodes(main_object) code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm a@0, 3 mov out, a@0 '''.strip() assert match_code(code, pattern)
def test_delayed_memderef(): src = ''' @kernel def main(q): b = 3 a = q[b] b = 5 c = a ''' comp = CompilerDriver(8) main_object = comp.run(src) code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm r0,3 add r2,r1,r0 memr r0,r2 imm r1,5 '''.strip() assert match_code(code, pattern)
def test_logic_variable(): src = ''' @kernel def main(b): a = b > 0 c = 3 if a else 1 ''' comp = CompilerDriver(8) main_object = comp.run(src) code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm r1,0 imm r2,3 imm r3,1 cmp r0,r1 mov r0,r3 mov {GT} r0,r2 '''.strip() assert match_code(code, pattern)
def test_convert_to_ssa_ports(): ''' Test for the simple case without any branches, with port communication. ''' args = {} block_size = (4, 4) def codegen(code, block_size, args): with scoped_alloc(code, 2) as (a, b): yield Imm(a, 1) yield Mov(code.out, a) yield Mov(b, code.east) yield Add(a, a, b) expect = ''' imm r0~0, 1 mov out, r0~0 mov r1~0, east add r0~1, r0~0, r1~0 ''' convertor = CodegenToSSAConvertor(100) ref_code = convert_code_to_compact_repr(list(codegen(Code(), block_size, args))) test_code = convert_compact_repr_to_obj(convertor._convert_code_to_ssa(ref_code)[0]) assert match_code(test_code, expect)
def test_simple_loop_emit_noseq(): compiler = Compiler(no_sequencer=True) src = ''' @kernel def main(): b = 5 for i in range(2): b = i ''' kernel_objects= compiler.compile(src) main_object = kernel_objects[0] code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm b@0,5 imm i@0,0 mov b@1,i@0 imm i@1,1 mov b@2,i@1 '''.strip() assert match_code(code, pattern)
def test_replace_phi_nodes(): compiler = Compiler() src = ''' @kernel def main(a): b = 3 if a > 0 else 1 ''' kernel_objects= compiler.compile(src) main_object = kernel_objects[0] main_object = compiler.replace_phi_nodes(main_object) code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm tmp_0@0,0 imm tmp_2@0,3 imm tmp_3@0,1 cmp a@0,tmp_0@0 mov b@0,tmp_3@0 mov {GT} b@0,tmp_2@0 '''.strip() assert match_code(code, pattern)
def test_compile_conditional(): compiler = Compiler() src = ''' @kernel def main(a): b = 3 if a > 0 else 1 return b ''' kernel_objects= compiler.compile(src) main_object = kernel_objects[0] code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm tmp_0@0,0 imm tmp_2@0,3 imm tmp_3@0,1 cmp a@0,tmp_0@0 phi {GT} b@0,tmp_2@0,tmp_3@0 mov main___return@0,b@0 '''.strip() assert match_code(code, pattern)
def test_patch_reg_arguments(): ''' Patching of arguments before run, now with registers as arguments. ''' from blip.code.BlipCompiler import Compiler, NamedValue src = ''' @kernel def main(p, q): b = p - 2 return q + b ''' compiler = Compiler() main_object = compiler.compile(src)[0] patched_object = Compiler.patch_arguments_before_run(main_object, [NamedValue('test_value@0'), 41]) code = [InstrAdapter(x) for x in patched_object.code] pattern = ''' mov p@0,test_value@0 imm q@0,41 imm tmp_0@0,2 sub b@0,p@0,tmp_0@0 add tmp_1@0,q@0,b@0 mov main___return@0,tmp_1@0 '''.strip() assert match_code(code, pattern)
def test_memderef_inside_condassign(): src = ''' @kernel def main(): ind = 3 z = 1 b = z[4] if z > 0 else 0 ''' comp = CompilerDriver(8) main_object = comp.run(src) code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm r0,3 imm r1,1 imm r2,0 imm r3,4 memr_imm r4,5 imm r5,0 cmp r1,r2 mov r1,r5 mov {GT} r1,r4 '''.strip() assert match_code(code, pattern)
def test_conv_code_noseq(): ''' Test a simple 3x1 convolution. ''' src = ''' @kernel def main(in_ptr, out_ptr, th): coeff = [-1, 0, 1] y = get_local_id(0) x = get_local_id(1) index = 16*y acc = 0 current_x = x - 1 for i in range(3): in_v = in_ptr[index+current_x] acc += (coeff[i] * in_v if current_x < 16 else 0) if current_x >= 0 else 0 current_x += 1 out_ptr[index+x] = acc ''' comp = CompilerDriver(16, no_sequencer=True) main_object = comp.run(src) code = [InstrAdapter(x) for x in main_object.code] pattern = ''' imm r0,0 lid r3,r0 imm r0,1 lid r4,r0 imm r0,16 mul r5,r0,r3 imm r0,0 imm r3,1 sub r6,r4,r3 imm r3,0 add r7,r5,r6 add r8,r2,r7 memr r7,r8 imm r8,0 imm r9,16 imm r10,-1 mul r11,r10,r7 imm r7,0 cmp r6,r9 mov r9,r7 mov {LT} r9,r11 imm r7,0 cmp r6,r8 mov r8,r7 mov {GE} r8,r9 add r7,r0,r8 imm r0,1 add r8,r6,r0 imm r0,1 add r6,r5,r8 add r9,r2,r6 memr r6,r9 imm r9,0 imm r10,16 imm r11,0 mul r12,r11,r6 imm r6,0 cmp r8,r10 mov r10,r6 mov {LT} r10,r12 imm r6,0 cmp r8,r9 mov r9,r6 mov {GE} r9,r10 add r6,r7,r9 imm r7,1 add r9,r8,r7 imm r7,2 add r8,r5,r9 add r10,r2,r8 memr r2,r10 imm r8,0 imm r10,16 imm r11,1 mul r12,r11,r2 imm r2,0 cmp r9,r10 mov r10,r2 mov {LT} r10,r12 imm r2,0 cmp r9,r8 mov r8,r2 mov {GE} r8,r10 add r2,r6,r8 imm r6,1 add r8,r9,r6 add r6,r5,r4 add r4,r1,r6 memw r4,r2 '''.strip() assert match_code(code, pattern)
def test_get2D(): ''' Test the get2D builtin, this code fetches a value from a 2D buffer with boundary handling. ''' compiler = Compiler() main_object = compiler.compile(get2D_simple_src)[0] code = [InstrAdapter(x) for x in main_object.code] pattern = ''' # initiate parameters imm in_ptr@0, 0 imm bwidth@0, 8 imm bheight@0, 8 imm tmp_0@0, 0 lid y@0, tmp_0@0 imm tmp_1@0, 1 lid x@0, tmp_1@0 # copy parameters mov cg_tmp_18@0, in_ptr@0 mov cg_tmp_4@0, x@0 mov cg_tmp_10@0, y@0 mov cg_tmp_5@0, bwidth@0 mov cg_tmp_11@0, bheight@0 # start of get2D imm cg_tmp_1@0, 0 imm cg_tmp_2@0, 0 add cg_tmp_3@0, cg_tmp_4@0, cg_tmp_5@0 # cg_tmp_3 = x + bwidth cmp cg_tmp_4@0, cg_tmp_1@0 # comp x, 0 phi {LT} cg_tmp_6@0, cg_tmp_3@0, cg_tmp_4@0 # cg_tmp_6 = phi(x < 0, cg_tmp_3, x) sub cg_tmp_7@0, cg_tmp_6@0, cg_tmp_5@0 # cg_tmp_7 = cg_tmp_6 - bwidth cmp cg_tmp_4@0, cg_tmp_5@0 # comp x, bwidth phi {GE} cg_tmp_8@0, cg_tmp_7@0, cg_tmp_6@0 # cg_tmp_4 = phi(x >= bwidth, cg_tmp_7, cg_tmp_6) add cg_tmp_9@0, cg_tmp_10@0, cg_tmp_11@0 # cg_tmp_9 = y + bheight cmp cg_tmp_10@0, cg_tmp_2@0 phi {LT} cg_tmp_12@0, cg_tmp_9@0, cg_tmp_10@0 sub cg_tmp_13@0, cg_tmp_12@0, cg_tmp_11@0 cmp cg_tmp_10@0, cg_tmp_11@0 phi {GE} cg_tmp_14@0, cg_tmp_13@0, cg_tmp_12@0 mul cg_tmp_15@0, cg_tmp_14@0, cg_tmp_5@0 add cg_tmp_16@0, cg_tmp_15@0, cg_tmp_8@0 add cg_tmp_17@0, cg_tmp_18@0, cg_tmp_16@0 memr cg_tmp_19@0, cg_tmp_17@0 mov out, cg_tmp_19@0 mov cg_tmp_20@0, west cmp cg_tmp_4@0, cg_tmp_1@0 phi {LT} cg_tmp_21@0, cg_tmp_20@0, cg_tmp_19@0 mov out, cg_tmp_21@0 mov cg_tmp_22@0, east cmp cg_tmp_4@0, cg_tmp_5@0 phi {GE} cg_tmp_23@0, cg_tmp_22@0, cg_tmp_21@0 mov out, cg_tmp_23@0 mov cg_tmp_24@0, north cmp cg_tmp_10@0, cg_tmp_2@0 phi {LT} cg_tmp_25@0, cg_tmp_24@0, cg_tmp_23@0 mov out, cg_tmp_25@0 mov cg_tmp_26@0, south cmp cg_tmp_10@0, cg_tmp_11@0 phi {GE} cg_tmp_27@0, cg_tmp_26@0, cg_tmp_25@0 mov cg_tmp_28@0, cg_tmp_27@0 mov v@0, cg_tmp_28@0 '''.strip() assert match_code(code, pattern)