def _BblRegAllocOrSpill(bbl: ir.Bbl, fun: ir.Fun) -> int: """Allocates regs to the intra bbl live ranges Note, this runs after global register allocation has occurred """ # print ("\n".join(serialize.BblRenderToAsm(bbl))) live_ranges = liveness.BblGetLiveRanges(bbl, fun, bbl.live_out) live_ranges.sort() for lr in live_ranges: assert liveness.LiveRangeFlag.IGNORE not in lr.flags # since we are operating on a BBL we cannot change LiveRanges # extending beyond the BBL. # reg_kinds_fixed (e.g. Machine) regs are assumed to be # pre-allocated and will not change if lr.reg.HasCpuReg(): lr.flags |= liveness.LiveRangeFlag.PRE_ALLOC lr.cpu_reg = lr.reg.cpu_reg # print (repr(lr)) if False: print("@@@@@@@@@@@@@@@@@@@") for lr in live_ranges: ins = "" if lr.def_pos >= 0 and not lr.is_use_lr(): ins = "\t# " + serialize.InsRenderToAsm(bbl.inss[lr.def_pos]) print(str(lr) + ins) _RunLinearScan(bbl, fun, live_ranges, True, GPR_REGS_MASK & GPR_LAC_REGS_MASK, GPR_REGS_MASK & ~GPR_LAC_REGS_MASK, FLT_REGS_MASK & FLT_LAC_REGS_MASK, FLT_REGS_MASK & ~FLT_LAC_REGS_MASK) spilled_regs = _AssignAllocatedRegsAndReturnSpilledRegs(live_ranges) if spilled_regs: # print (f"@@ adjusted spill count: {len(spilled_regs)} {spilled_regs}") reg_alloc.BblSpillRegs(bbl, fun, spilled_regs, o.DK.U32, "$spill") live_ranges = liveness.BblGetLiveRanges(bbl, fun, bbl.live_out) live_ranges.sort() for lr in live_ranges: # since we are operating on a BBL we cannot change LiveRanges # extending beyond the BBL. # reg_kinds_fixed (e.g. Machine) regs are assumed to be # pre-allocated and will not change if lr.reg.HasCpuReg(): lr.flags |= liveness.LiveRangeFlag.PRE_ALLOC lr.cpu_reg = lr.reg.cpu_reg _RunLinearScan(bbl, fun, live_ranges, False, GPR_REGS_MASK & GPR_LAC_REGS_MASK, GPR_REGS_MASK & ~GPR_LAC_REGS_MASK, FLT_REGS_MASK & FLT_LAC_REGS_MASK, FLT_REGS_MASK & ~FLT_LAC_REGS_MASK) spilled_regs = _AssignAllocatedRegsAndReturnSpilledRegs(live_ranges) assert not spilled_regs return 0
def testBacktrack(self): # this is a good example for linear scan producing pretty bad assignments code = io.StringIO(r""" .fun main NORMAL [U32 U32 U32 U32] = [U32 U32 U32 U32] .bbl start poparg w:U32 poparg x:U32 poparg y:U32 poparg z:U32 mov a:U32 1 mov b:U32 2 mov c:U32 3 mov d:U32 4 cmpeq e:U32 a b c d pusharg z pusharg y pusharg x pusharg w ret """) unit = serialize.UnitParseFromAsm(code, False) fun = unit.fun_syms["main"] bbl = fun.bbls[0] _DumpBblWithLineNumbers(bbl) live_ranges = liveness.BblGetLiveRanges(bbl, fun, set(), True) live_ranges.sort() pool = TestRegPool(MakeGenericCpuRegs(4)) reg_alloc.RegisterAssignerLinearScan(live_ranges, pool) for n, lr in enumerate(live_ranges): # print (lr) if not lr.uses: if n <= 3: assert lr.cpu_reg.no == n else: assert lr.cpu_reg is ir.CPU_REG_SPILL, f"unexpected reg {lr}" live_ranges = liveness.BblGetLiveRanges(bbl, fun, set(), True) live_ranges.sort() pool = TestRegPool(MakeGenericCpuRegs(8)) reg_alloc.RegisterAssignerLinearScanFancy(live_ranges, pool, False) for n, lr in enumerate(live_ranges): # print (lr) assert lr.cpu_reg != ir.CPU_REG_SPILL, f"unexpected reg {lr}"
def _BblRegAllocOrSpill(bbl: ir.Bbl, fun: ir.Fun) -> int: """Allocates regs to the intra bbl live ranges Note, this runs after global register allocation has occurred """ # print ("\n".join(serialize.BblRenderToAsm(bbl))) live_ranges = liveness.BblGetLiveRanges(bbl, fun, bbl.live_out) live_ranges.sort() for lr in live_ranges: assert liveness.LiveRangeFlag.IGNORE not in lr.flags # since we are operating on a BBL we cannot change LiveRanges # extending beyond the BBL. # reg_kinds_fixed (e.g. Machine) regs are assumed to be # pre-allocated and will not change if lr.reg.HasCpuReg(): lr.flags |= liveness.LiveRangeFlag.PRE_ALLOC lr.cpu_reg = lr.reg.cpu_reg # print (repr(lr)) # First reg-alloc path to determine if spilling is needed. # Note, global and fixed registers have already been assigned and will # be respected by the allocator. _RunLinearScan(bbl, fun, live_ranges, True, GPR_REGS_MASK & GPR_LAC_REGS_MASK, GPR_REGS_MASK & ~GPR_LAC_REGS_MASK, FLT_REGS_MASK & FLT_LAC_REGS_MASK, FLT_REGS_MASK & ~FLT_LAC_REGS_MASK) spilled_regs = _AssignAllocatedRegsAndReturnSpilledRegs(live_ranges) if spilled_regs: # print (f"@@ adjusted spill count: {len(spilled_regs)} {spilled_regs}") # convert all register spills to loads/stores from/to the stack # this introduces new temporaries so we run another register allocation pass # afterwards reg_alloc.BblSpillRegs(bbl, fun, spilled_regs, o.DK.U32, "$spill") live_ranges = liveness.BblGetLiveRanges(bbl, fun, bbl.live_out) live_ranges.sort() for lr in live_ranges: # since we are operating on a BBL we cannot change LiveRanges # extending beyond the BBL. # reg_kinds_fixed (e.g. Machine) regs are assumed to be # pre-allocated and will not change if lr.reg.HasCpuReg(): lr.flags |= liveness.LiveRangeFlag.PRE_ALLOC lr.cpu_reg = lr.reg.cpu_reg _RunLinearScan(bbl, fun, live_ranges, False, GPR_REGS_MASK & GPR_LAC_REGS_MASK, GPR_REGS_MASK & ~GPR_LAC_REGS_MASK, FLT_REGS_MASK & FLT_LAC_REGS_MASK, FLT_REGS_MASK & ~FLT_LAC_REGS_MASK) spilled_regs = _AssignAllocatedRegsAndReturnSpilledRegs(live_ranges) assert not spilled_regs return 0
def testSimple(self): code = io.StringIO(r""" .fun main NORMAL [U32 U32 U32 U32] = [U32 U32 U32 U32] .bbl start poparg w:U32 poparg x:U32 poparg y:U32 poparg z:U32 pusharg z pusharg y pusharg x pusharg w ret """) unit = serialize.UnitParseFromAsm(code, False) fun = unit.fun_syms["main"] bbl = fun.bbls[0] _DumpBblWithLineNumbers(bbl) live_ranges = liveness.BblGetLiveRanges(bbl, fun, set(), True) live_ranges.sort() pool = TestRegPool(MakeGenericCpuRegs(4)) reg_alloc.RegisterAssignerLinearScan(live_ranges, pool) for n, lr in enumerate(live_ranges): # print (lr) if not lr.uses: assert lr.cpu_reg.no == n, f"unexpected reg {lr}" live_ranges = liveness.BblGetLiveRanges(bbl, fun, set(), True) live_ranges.sort() pool = TestRegPool(MakeGenericCpuRegs(3)) reg_alloc.RegisterAssignerLinearScan(live_ranges, pool) for n, lr in enumerate(live_ranges): # print (lr) if not lr.uses: if n <= 2: assert lr.cpu_reg.no == n else: assert lr.cpu_reg == ir.CPU_REG_SPILL, f"unexpected reg {lr}"
def testD(self): code = io.StringIO(r""" .fun arm_syscall_write SIGNATURE [S32] = [S32 A32 U32] .fun putchar NORMAL [] = [U32] .fun writeln NORMAL [] = [A32 U32] # live_out: ['r0', 'r1'] .reg S32 [$r0_S32 dummy] .reg U32 [$r0_U32 $r1_U32 $r2_U32 len] .reg A32 [$r0_A32 $r1_A32 buf] .bbl start mov buf $r0_A32@r0 # 0 mov len $r1_U32@r1 # 1 mov $r2_U32@r2 len # 2 mov $r1_A32@r1 buf # 3 mov $r0_S32@r0 1 # 4 syscall arm_syscall_write 4:U32 # 5 mov dummy $r0_S32@r0 # 6 mov $r0_U32@r0 10 # 7 bsr putchar # 8 ret # 9 """) cpu_regs = {"r0": ir.CpuReg("r0", 0), "r1": ir.CpuReg("r1", 1), "r2": ir.CpuReg("r2", 2)} unit = serialize.UnitParseFromAsm(code, cpu_regs=cpu_regs) fun = unit.fun_syms["arm_syscall_write"] fun.cpu_live_out = {cpu_regs["r0"]} fun.cpu_live_in = {cpu_regs["r0"], cpu_regs["r1"], cpu_regs["r2"]} fun = unit.fun_syms["putchar"] fun.cpu_live_in = {cpu_regs["r0"]} fun = unit.fun_syms["writeln"] cfg.FunSplitBbls(fun) cfg.FunInitCFG(fun) cfg.FunRemoveUnconditionalBranches(fun) cfg.FunRemoveEmptyBbls(fun) liveness.FunComputeLivenessInfo(fun) ranges = liveness.BblGetLiveRanges(fun.bbls[0], fun, fun.bbls[0].live_out, False) ranges.sort() print("TestD") for lr in ranges: print(lr) self.assertEqual(ranges, [ liveness.LiveRange(liveness.BEFORE_BBL, 0, fun.reg_syms["$r0_A32"], 1), liveness.LiveRange(liveness.BEFORE_BBL, 1, fun.reg_syms["$r1_U32"], 1), liveness.LiveRange(0, 3, fun.reg_syms["buf"], 1), liveness.LiveRange(1, 2, fun.reg_syms["len"], 1), liveness.LiveRange(2, 5, fun.reg_syms["$r2_U32"], 0), liveness.LiveRange(3, 5, fun.reg_syms["$r1_A32"], 0), liveness.LiveRange(4, 5, fun.reg_syms["$r0_S32"], 0), liveness.LiveRange(5, 6, fun.reg_syms["$r0_S32"], 1), liveness.LiveRange(6, liveness.NO_USE, fun.reg_syms["dummy"], 0), liveness.LiveRange(7, 8, fun.reg_syms["$r0_U32"], 0), ])
def _BblRegAllocOrSpill(bbl: ir.Bbl, fun: ir.Fun) -> int: """Allocates regs to the intra bbl live ranges Note, this runs after global register allocation has occurred """ # print ("\n".join(serialize.BblRenderToAsm(bbl))) live_ranges = liveness.BblGetLiveRanges(bbl, fun, bbl.live_out, True) live_ranges.sort() for lr in live_ranges: # since we are operating on a BBL we cannot change LifeRanges # extending beyond the BBL. # reg_kinds_fixed (e.g. Machine) regs are assumed to be # pre-allocated and will not change if lr.reg.HasCpuReg(): lr.flags |= liveness.LiveRangeFlag.PRE_ALLOC lr.cpu_reg = lr.reg.cpu_reg # print (repr(lr)) _RunLinearScan(bbl, fun, live_ranges, True, _GPR_CALLEE_SAVE_REGS_MASK, _GPR_NOT_LAC_REGS_MASK, _FLT_CALLEE_SAVE_REGS_MASK, _FLT_PARAMETER_REGS_MASK) spilled_regs = _AssignAllocatedRegsAndReturnSpilledRegs(live_ranges) if spilled_regs: # print (f"@@ adjusted spill count: {len(spilled_regs)} {spilled_regs}") reg_alloc.BblSpillRegs(bbl, fun, spilled_regs, o.DK.U32) live_ranges = liveness.BblGetLiveRanges(bbl, fun, bbl.live_out, True) live_ranges.sort() for lr in live_ranges: # since we are operating on a BBL we cannot change LifeRanges # extending beyond the BBL. # reg_kinds_fixed (e.g. Machine) regs are assumed to be # pre-allocated and will not change if lr.reg.HasCpuReg(): lr.flags |= liveness.LiveRangeFlag.PRE_ALLOC lr.cpu_reg = lr.reg.cpu_reg _RunLinearScan(bbl, fun, live_ranges, False, _GPR_CALLEE_SAVE_REGS_MASK, _GPR_NOT_LAC_REGS_MASK, _FLT_CALLEE_SAVE_REGS_MASK, _FLT_PARAMETER_REGS_MASK) spilled_regs = _AssignAllocatedRegsAndReturnSpilledRegs(live_ranges) assert not spilled_regs return 0
def FunComputeBblRegUsageStats( fun: ir.Fun, reg_kind_map: Dict[o.DK, o.DK]) -> Dict[REG_KIND_LAC, int]: """ Computes maximum number of register needed for locals across all Bbls Requires liveness """ pool = BblRegUsageStatsRegPool(reg_kind_map) for bbl in fun.bbls: live_ranges = liveness.BblGetLiveRanges(bbl, fun, bbl.live_out, True) # we do not want re-use of regs that are not coming from the pool for lr in live_ranges: if LiveRangeShouldBeIgnored(lr, reg_kind_map): lr.flags |= liveness.LiveRangeFlag.IGNORE reg_alloc.RegisterAssignerLinearScan(live_ranges, pool) return pool.usage()
def testNoChange(self): x = ir.Reg("x", o.DK.S32) target = ir.Bbl("target") bbl = ir.Bbl("bbl") bbl.live_out.add(x) bbl.AddIns(ir.Ins(O("poparg"), [x])) bbl.AddIns(ir.Ins(O("blt"), [target, ir.OffsetConst(1), x])) DumpBbl(bbl) live_ranges = liveness.BblGetLiveRanges(bbl, None, bbl.live_out, False) live_ranges.sort() lr_cross_bbl = [lr for lr in live_ranges if lr.is_cross_bbl()] lr_lac = [lr for lr in live_ranges if liveness.LiveRangeFlag.LAC in lr.flags] assert len(live_ranges) == 1 assert len(lr_cross_bbl) == 1 assert len(lr_lac) == 0, f"{lr_lac}"
def _BblRegAllocOrSpill(bbl: ir.Bbl, fun: ir.Fun) -> int: """Allocates regs to the intra bbl live ranges Note, this runs after global register allocation has occurred """ VERBOSE = False if VERBOSE: _DumpBblWithLineNumbers(bbl) live_ranges = liveness.BblGetLiveRanges(bbl, fun, bbl.live_out) live_ranges.sort() for lr in live_ranges: assert liveness.LiveRangeFlag.IGNORE not in lr.flags # since we are operating on a BBL we cannot change LiveRanges # extending beyond the BBL. # reg_kinds_fixed (e.g. Machine) regs are assumed to be # pre-allocated and will not change assert not lr.reg.IsSpilled() if lr.reg.HasCpuReg(): lr.flags |= liveness.LiveRangeFlag.PRE_ALLOC lr.cpu_reg = lr.reg.cpu_reg if VERBOSE: print(repr(lr)) # First reg-alloc path to determine if spilling is needed. # Note, global and fixed registers have already been assigned and will # be respected by the allocator. _RunLinearScan(bbl, fun, live_ranges, True, GPR_REGS_MASK & GPR_LAC_REGS_MASK, GPR_REGS_MASK & ~GPR_LAC_REGS_MASK, FLT_REGS_MASK & FLT_LAC_REGS_MASK, FLT_REGS_MASK & ~FLT_LAC_REGS_MASK) if VERBOSE: print("@@@ AFTER") for lr in live_ranges: print(repr(lr)) # for reg # print(f"SPILL: {spilled_regs}") # count += len(spilled_regs) # reg_alloc.BblSpillRegs(bbl, fun, spilled_regs, o.DK.U32) # continue # # # print ("\n".join(serialize.BblRenderToAsm(bbl))) # return count return _AssignAllocatedRegsAndMarkSpilledRegs(live_ranges)
def testE(self): code = io.StringIO(r""" .fun test NORMAL [F32 F32 F32 F32] = [F32 F32] .reg F32 [a b add sub mul div $s0_F32 $s1_F32 $s2_F32 $s3_F32] .bbl start mov a $s0_F32@s0 mov b $s1_F32@s1 add add a b sub sub a b mul mul a b div div a b mov $s3_F32@s3 div mov $s2_F32@s2 mul mov $s1_F32@s1 sub mov $s0_F32@s0 add ret """) cpu_regs = { "s0": ir.CpuReg("s0", 0), "s1": ir.CpuReg("s1", 1), "s2": ir.CpuReg("s2", 2), "s3": ir.CpuReg("s3", 2) } unit = serialize.UnitParseFromAsm(code, cpu_regs=cpu_regs) fun = unit.fun_syms["test"] fun.cpu_live_out = { cpu_regs["s0"], cpu_regs["s1"], cpu_regs["s2"], cpu_regs["s3"] } fun.cpu_live_in = {cpu_regs["s0"], cpu_regs["s1"]} cfg.FunSplitBblsAtTerminators(fun) cfg.FunInitCFG(fun) cfg.FunRemoveUnconditionalBranches(fun) cfg.FunRemoveEmptyBbls(fun) liveness.FunComputeLivenessInfo(fun) ranges = liveness.BblGetLiveRanges(fun.bbls[0], fun, fun.bbls[0].live_out) ranges.sort() print("TestE") for lr in ranges: print(lr)
def FunComputeBblRegUsageStats( fun: ir.Fun, reg_kind_map: Dict[o.DK, int]) -> Dict[REG_KIND_LAC, int]: """ Computes maximum number of register needed for locals across all Bbls Requires liveness. """ pool = BblRegUsageStatsRegPool(reg_kind_map) for bbl in fun.bbls: live_ranges = liveness.BblGetLiveRanges(bbl, fun, bbl.live_out) live_ranges.sort() if TRACE_REG_ALLOC: print("@" * 60) print("\n".join(serialize.BblRenderToAsm(bbl))) for lr in live_ranges: print(lr) # we do not want re-use of regs that are not coming from the pool for lr in live_ranges: if LiveRangeShouldBeIgnored(lr, reg_kind_map): lr.flags |= liveness.LiveRangeFlag.IGNORE reg_alloc.RegisterAssignerLinearScan(live_ranges, pool) return pool.usage()
def testA(self): code = io.StringIO(r""" .fun printf_u BUILTIN [] = [A32 U32] .fun multi BUILTIN [U32 U32 U32 U32 U32] = [U32 U32] .mem fmt 4 RO .data 1 "%d\n\0" .fun main NORMAL [S32] = [] .reg U32 [a s m d M x y out] .reg A64 [f] .bbl start mov x = 70 mov y = 6 pusharg y pusharg x bsr multi poparg a poparg s poparg m poparg d poparg M lea.mem f = fmt 0 pusharg a pusharg f bsr printf_u pusharg s pusharg f bsr printf_u pusharg m pusharg f bsr printf_u pusharg d pusharg f bsr printf_u pusharg M pusharg f bsr printf_u mov out = 0 pusharg out ret """) unit = serialize.UnitParseFromAsm(code, False) fun = unit.fun_syms["main"] bbl = fun.bbls[0] x = fun.reg_syms["x"] y = fun.reg_syms["y"] a = fun.reg_syms["a"] s = fun.reg_syms["s"] m = fun.reg_syms["m"] d = fun.reg_syms["d"] M = fun.reg_syms["M"] out = fun.reg_syms["out"] f = fun.reg_syms["f"] DumpBbl(bbl) live_ranges = liveness.BblGetLiveRanges(bbl, fun, bbl.live_out, False) live_ranges.sort() lr_cross_bbl = [lr for lr in live_ranges if lr.is_cross_bbl()] lr_lac = [lr for lr in live_ranges if liveness.LiveRangeFlag.LAC in lr.flags] assert len(live_ranges) == 9, f"{live_ranges}" assert len(lr_cross_bbl) == 0, f"{lr_cross_bbl}" assert len(lr_lac) == 5, f"{lr_lac}" for lr in live_ranges: print("checking LR lac:", lr) # assert lr.lac == lr.reg in {M, d, f, m, s}, f"LR {lr}" self.assertNotEqual(lr.def_pos, lr.last_use_pos) self.assertEqual(liveness.LiveRangeFlag.LAC in lr.flags, lr.reg in {M, d, f, m, s})
def testD(self): code = io.StringIO(r""" .fun arm_syscall_write SIGNATURE [S32] = [S32 A32 U32] .fun putchar NORMAL [] = [U32] .fun writeln NORMAL [] = [A32 U32] # live_out: ['r0', 'r1'] .reg S32 [$r0_S32 dummy] .reg U32 [$r0_U32 $r1_U32 $r2_U32 len] .reg A32 [$r0_A32 $r1_A32 buf] .bbl start mov buf $r0_A32@r0 # 0 mov len $r1_U32@r1 # 1 mov $r2_U32@r2 len # 2 mov $r1_A32@r1 buf # 3 mov $r0_S32@r0 1 # 4 syscall arm_syscall_write 4:U32 # 5 mov dummy $r0_S32@r0 # 6 mov $r0_U32@r0 10 # 7 bsr putchar # 8 ret # 9 """) cpu_regs = { "r0": ir.CpuReg("r0", 0), "r1": ir.CpuReg("r1", 1), "r2": ir.CpuReg("r2", 2) } unit = serialize.UnitParseFromAsm(code, cpu_regs=cpu_regs) fun = unit.fun_syms["arm_syscall_write"] fun.cpu_live_out = {cpu_regs["r0"]} fun.cpu_live_in = {cpu_regs["r0"], cpu_regs["r1"], cpu_regs["r2"]} fun = unit.fun_syms["putchar"] fun.cpu_live_in = {cpu_regs["r0"]} fun = unit.fun_syms["writeln"] cfg.FunSplitBblsAtTerminators(fun) cfg.FunInitCFG(fun) cfg.FunRemoveUnconditionalBranches(fun) cfg.FunRemoveEmptyBbls(fun) liveness.FunComputeLivenessInfo(fun) ranges = liveness.BblGetLiveRanges(fun.bbls[0], fun, fun.bbls[0].live_out) ranges.sort() print("TestD") for lr in ranges: print(lr) lr_r0 = liveness.LiveRange(liveness.BEFORE_BBL, 0, fun.reg_syms["$r0_A32"], 1) lr_r1 = liveness.LiveRange(liveness.BEFORE_BBL, 1, fun.reg_syms["$r1_U32"], 1) lr_buf = liveness.LiveRange(0, 3, fun.reg_syms["buf"], 1) lr_len = liveness.LiveRange(1, 2, fun.reg_syms["len"], 1) lr_r0_2 = liveness.LiveRange(5, 6, fun.reg_syms["$r0_S32"], 1) expected = [ lr_r0, lr_r1, liveness.LiveRange(0, 0, reg=ir.REG_INVALID, num_uses=1, uses=[lr_r0]), lr_buf, liveness.LiveRange(1, 1, reg=ir.REG_INVALID, num_uses=1, uses=[lr_r1]), lr_len, liveness.LiveRange(2, 2, reg=ir.REG_INVALID, num_uses=1, uses=[lr_len]), liveness.LiveRange(2, 5, fun.reg_syms["$r2_U32"], 0), liveness.LiveRange(3, 3, reg=ir.REG_INVALID, num_uses=1, uses=[lr_buf]), liveness.LiveRange(3, 5, fun.reg_syms["$r1_A32"], 0), liveness.LiveRange(4, 5, fun.reg_syms["$r0_S32"], 0), lr_r0_2, liveness.LiveRange(6, 6, reg=ir.REG_INVALID, num_uses=1, uses=[lr_r0_2]), liveness.LiveRange(6, liveness.NO_USE, fun.reg_syms["dummy"], 0), liveness.LiveRange(7, 8, fun.reg_syms["$r0_U32"], 0), ] # self.assertSequenceEqual(ranges, expected) # this does not work because of the uses field self.assertEqual(len(ranges), len(expected)) for a, b in zip(): self.assertEqual(a, b)