示例#1
0
    def testB(self):
        code = io.StringIO(r"""
.fun main NORMAL [S32] = [S32 A64]

.bbl %start
  poparg argc:S32
  poparg argv:A64
  mov b:S32 1
  add a:S32 b 1
  add x:S32 a 1
  blt argc 2 if_1_true
  bra if_1_end
  
.bbl if_1_true
   pusharg 1:S32
   ret
   
.bbl if_1_end
   pusharg 0:S32
   ret
""")
        unit = serialize.UnitParseFromAsm(code)
        fun = unit.fun_syms["main"]
        optimize.FunCfgInit(fun, unit)
        liveness.FunComputeLivenessInfo(fun)
        liveness.FunRemoveUselessInstructions(fun)
        # print ("@@@@\n", "\n".join(serialize.FunRenderToAsm(fun)))
        for bbl in fun.bbls:
            for ins in bbl.inss:
                self.assertTrue(ins.opcode in {o.POPARG, o.PUSHARG, o.RET, o.BLT}, f"bad ins {ins}")
示例#2
0
def FunOptBasic(fun: ir.Fun, opt_stats: Dict[str, int],
                allow_conv_conversion: bool):
    opt_stats["canonicalized"] += canonicalize.FunCanonicalize(fun)
    opt_stats["strength_red"] += lowering.FunStrengthReduction(fun)

    reaching_defs.FunComputeReachingDefs(fun)
    reaching_defs.FunCheckReachingDefs(fun)
    opt_stats["reg_prop"] = reaching_defs.FunPropagateRegs(fun)
    opt_stats["const_prop"] += reaching_defs.FunPropagateConsts(fun)

    opt_stats["const_fold"] += reaching_defs.FunConstantFold(
        fun, allow_conv_conversion)

    opt_stats["canonicalized"] += canonicalize.FunCanonicalize(fun)
    opt_stats["strength_red"] += lowering.FunStrengthReduction(fun)

    opt_stats["ls_st_simplify"] += reaching_defs.FunLoadStoreSimplify(fun)

    opt_stats["move_elim"] += lowering.FunMoveElimination(fun)

    liveness.FunComputeLivenessInfo(fun)

    opt_stats["useless"] = liveness.FunRemoveUselessInstructions(fun)
    reg_stats.FunComputeRegStatsExceptLAC(fun)
    reg_stats.FunComputeRegStatsLAC(fun)

    opt_stats["dropped_regs"] += reg_stats.FunDropUnreferencedRegs(fun)
    opt_stats["separated_regs"] += reg_stats.FunSeparateLocalRegUsage(fun)
示例#3
0
    def testD(self):
        code = io.StringIO(r"""
.fun arm_syscall_write SIGNATURE [S32] = [S32 A32 U32]

.fun putchar NORMAL [] = [U32]

.fun writeln NORMAL [] = [A32 U32]
# live_out: ['r0', 'r1']
.reg S32 [$r0_S32 dummy]
.reg U32 [$r0_U32 $r1_U32 $r2_U32 len]
.reg A32 [$r0_A32 $r1_A32 buf]
.bbl start
    mov buf $r0_A32@r0                     # 0
    mov len $r1_U32@r1                     # 1
    mov $r2_U32@r2 len                     # 2
    mov $r1_A32@r1 buf                     # 3
    mov $r0_S32@r0 1                       # 4
    syscall arm_syscall_write 4:U32        # 5
    mov dummy $r0_S32@r0                   # 6
    mov $r0_U32@r0 10                      # 7
    bsr putchar                            # 8
    ret                                    # 9
""")
        cpu_regs = {"r0": ir.CpuReg("r0", 0), "r1": ir.CpuReg("r1", 1), "r2": ir.CpuReg("r2", 2)}
        unit = serialize.UnitParseFromAsm(code, cpu_regs=cpu_regs)

        fun = unit.fun_syms["arm_syscall_write"]
        fun.cpu_live_out = {cpu_regs["r0"]}
        fun.cpu_live_in = {cpu_regs["r0"], cpu_regs["r1"], cpu_regs["r2"]}

        fun = unit.fun_syms["putchar"]
        fun.cpu_live_in = {cpu_regs["r0"]}

        fun = unit.fun_syms["writeln"]
        cfg.FunSplitBbls(fun)
        cfg.FunInitCFG(fun)
        cfg.FunRemoveUnconditionalBranches(fun)
        cfg.FunRemoveEmptyBbls(fun)
        liveness.FunComputeLivenessInfo(fun)
        ranges = liveness.BblGetLiveRanges(fun.bbls[0], fun, fun.bbls[0].live_out, False)
        ranges.sort()
        print("TestD")
        for lr in ranges:
            print(lr)
        self.assertEqual(ranges, [
            liveness.LiveRange(liveness.BEFORE_BBL, 0, fun.reg_syms["$r0_A32"], 1),
            liveness.LiveRange(liveness.BEFORE_BBL, 1, fun.reg_syms["$r1_U32"], 1),
            liveness.LiveRange(0, 3, fun.reg_syms["buf"], 1),
            liveness.LiveRange(1, 2, fun.reg_syms["len"], 1),
            liveness.LiveRange(2, 5, fun.reg_syms["$r2_U32"], 0),
            liveness.LiveRange(3, 5, fun.reg_syms["$r1_A32"], 0),
            liveness.LiveRange(4, 5, fun.reg_syms["$r0_S32"], 0),
            liveness.LiveRange(5, 6, fun.reg_syms["$r0_S32"], 1),
            liveness.LiveRange(6, liveness.NO_USE, fun.reg_syms["dummy"], 0),
            liveness.LiveRange(7, 8, fun.reg_syms["$r0_U32"], 0),
        ])
示例#4
0
    def testBaseRegPropagation2(self):
        code = io.StringIO(r"""
.fun foo NORMAL [] = []
    .reg S32 [x]
    .reg U32 [y]
    .reg A32 [a counter] 

.bbl start
    poparg counter
    poparg y

    lea a counter 666
    ld x = a 0
    mul x = x 777
    st a 334 = x

    lea a counter y
    ld x = a 0
    mul x = x 777
    st a 0 = x

    lea a counter y
    ld x = a 0
    mul x = x 777
    st a 0 = x

    mov a counter
    ld x = a 0
    mul x = x 777
    st a 334 = x

    ret
         """)

        unit = serialize.UnitParseFromAsm(code, False)
        fun = unit.fun_syms["foo"]
        bbl = fun.bbls[0]

        cfg.FunInitCFG(fun)
        liveness.FunComputeLivenessInfo(fun)
        reaching_defs.FunComputeReachingDefs(fun)
        reaching_defs.FunPropagateConsts(fun)
        reaching_defs.FunLoadStoreSimplify(fun)
        liveness.FunRemoveUselessInstructions(fun)
        print("\n".join(serialize.FunRenderToAsm(fun)))
        # all ld/st were re-written
        for ins in bbl.inss:
            self.assertIn(ins.opcode.name, {
                "ret",
                "mul",
                "poparg",
                "ld",
                "ld",
                "st",
                "st",
            })
示例#5
0
def UnitOptBasic(unit: ir.Unit, dump_reg_stats) -> Dict[str, int]:
    opt_stats: Dict[str, int] = collections.defaultdict(int)
    for fun in unit.funs:
        if fun.kind is not o.FUN_KIND.NORMAL:
            continue
        FunOptBasic(fun, opt_stats, allow_conv_conversion=True)
        if dump_reg_stats:
            reg_stats.FunComputeRegStatsExceptLAC(fun)
            liveness.FunComputeLivenessInfo(fun)
            reg_stats.FunComputeRegStatsLAC(fun)
            rs = reg_stats.FunCalculateRegStats(fun)
            print(f"# {fun.name:30} RegStats: {rs}")
    return opt_stats
示例#6
0
    def testC(self):
        code = io.StringIO(r"""
.fun main NORMAL [S32] = []
.bbl %start
    mov %out:S32 3
    bra next
.bbl next
    pusharg %out
    ret
""")
        unit = serialize.UnitParseFromAsm(code)
        fun = unit.fun_syms["main"]
        optimize.FunCfgInit(fun, unit)
        liveness.FunComputeLivenessInfo(fun)
        # print ("@@@@\n", "\n".join(serialize.FunRenderToAsm(fun)))
        liveness.FunRemoveUselessInstructions(fun)
        # print ("@@@@\n", "\n".join(serialize.FunRenderToAsm(fun)))
        self.assertEqual(1, len(fun.bbls[0].inss))
        self.assertEqual(2, len(fun.bbls[1].inss))
示例#7
0
    def testE(self):
        code = io.StringIO(r"""


.fun test NORMAL [F32 F32 F32 F32] = [F32 F32]
.reg F32 [a b add sub mul div  $s0_F32  $s1_F32  $s2_F32  $s3_F32]
.bbl start
    mov a $s0_F32@s0
    mov b $s1_F32@s1
    add add a b
    sub sub a b
    mul mul a b
    div div a b
    mov $s3_F32@s3 div
    mov $s2_F32@s2 mul
    mov $s1_F32@s1 sub
    mov $s0_F32@s0 add
    ret
""")
        cpu_regs = {
            "s0": ir.CpuReg("s0", 0),
            "s1": ir.CpuReg("s1", 1),
            "s2": ir.CpuReg("s2", 2),
            "s3": ir.CpuReg("s3", 2)
        }
        unit = serialize.UnitParseFromAsm(code, cpu_regs=cpu_regs)
        fun = unit.fun_syms["test"]
        fun.cpu_live_out = {
            cpu_regs["s0"], cpu_regs["s1"], cpu_regs["s2"], cpu_regs["s3"]
        }
        fun.cpu_live_in = {cpu_regs["s0"], cpu_regs["s1"]}
        cfg.FunSplitBblsAtTerminators(fun)
        cfg.FunInitCFG(fun)
        cfg.FunRemoveUnconditionalBranches(fun)
        cfg.FunRemoveEmptyBbls(fun)
        liveness.FunComputeLivenessInfo(fun)
        ranges = liveness.BblGetLiveRanges(fun.bbls[0], fun,
                                           fun.bbls[0].live_out)
        ranges.sort()
        print("TestE")
        for lr in ranges:
            print(lr)
示例#8
0
def PhaseGlobalRegAlloc(fun: ir.Fun, _opt_stats: Dict[str, int], fout):
    """
    These phase introduces CpuReg for globals and situations where we have no choice
    which register to use, e.g. function parameters and results ("pre-allocated" regs).

    After this function has been run all globals will have a valid cpu_reg and
    we have to be careful to not introduce new globals subsequently.
    If not enough cpu_regs are available for all globals, some of them will be spilled.
    We err on the site of spilling more, the biggest danger is to over-allocate and then
    lack registers for intra-bbl register allocation.

    The whole global allocator is terrible and so is the the decision which globals
    to spill is extremely simplistic at this time.

    We separate global from local register allocation so that we can use a straight
    forward linear scan allocator for the locals. This allocator assumes that
    each register is defined exactly once and hence does not work for globals.
    """
    debug = None
    if fout:
        print("#" * 60, file=fout)
        print(f"# GlobalRegAlloc {fun.name}", file=fout)
        print("#" * 60, file=fout)

    # print ("@@@@@@\n", "\n".join(serialize.FunRenderToAsm(fun)))

    reg_stats.FunComputeRegStatsExceptLAC(fun)
    reg_stats.FunDropUnreferencedRegs(fun)
    liveness.FunComputeLivenessInfo(fun)
    reg_stats.FunComputeRegStatsLAC(fun)

    # Note: REG_KIND_MAP_ARM maps all non-float to registers to S64
    local_reg_stats = reg_stats.FunComputeBblRegUsageStats(fun,
                                                           regs.REG_KIND_TO_CPU_REG_FAMILY)
    # we  have introduced some cpu regs in previous phases - do not treat them as globals
    global_reg_stats = _FunGlobalRegStats(fun, regs.REG_KIND_TO_CPU_REG_FAMILY)
    DumpRegStats(fun, local_reg_stats, fout)

    pre_allocated_mask_gpr = 0
    for reg in fun.regs:
        if reg.HasCpuReg() and reg.cpu_reg.kind == regs.CpuRegKind.GPR:
            pre_allocated_mask_gpr |= 1 << reg.cpu_reg.no

    # Handle GPR regs
    needed_gpr = RegsNeeded(len(global_reg_stats[(regs.CpuRegKind.GPR, True)]),
                            len(global_reg_stats[(regs.CpuRegKind.GPR, False)]),
                            local_reg_stats.get((regs.CpuRegKind.GPR, True), 0),
                            local_reg_stats.get((regs.CpuRegKind.GPR, False), 0))
    if debug:
        print(f"@@ GPR NEEDED {needed_gpr.global_lac} {needed_gpr.global_not_lac} "
              f"{needed_gpr.local_lac} {needed_gpr.local_not_lac}", file=debug)

    gpr_global_lac, gpr_global_not_lac = _GetRegPoolsForGlobals(
        needed_gpr, regs.GPR_REGS_MASK & regs.GPR_LAC_REGS_MASK,
                    regs.GPR_REGS_MASK & ~regs.GPR_LAC_REGS_MASK, pre_allocated_mask_gpr)
    if debug:
        print(f"@@ GPR POOL {gpr_global_lac:x} {gpr_global_not_lac:x}", file=debug)

    to_be_spilled: List[ir.Reg] = []
    to_be_spilled += regs.AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(regs.CpuRegKind.GPR, True)], gpr_global_lac, 0)

    to_be_spilled += regs.AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(regs.CpuRegKind.GPR, False)],
        gpr_global_not_lac & ~regs.GPR_LAC_REGS_MASK,
        gpr_global_not_lac & regs.GPR_LAC_REGS_MASK)

    # Handle Float regs
    pre_allocated_mask_flt = 0
    for reg in fun.regs:
        if reg.HasCpuReg() and reg.cpu_reg.kind == regs.CpuRegKind.FLT:
            pre_allocated_mask_flt |= 1 << reg.cpu_reg.no

    needed_flt = RegsNeeded(len(global_reg_stats[(regs.CpuRegKind.FLT, True)]),
                            len(global_reg_stats[(regs.CpuRegKind.FLT, False)]),
                            local_reg_stats.get((regs.CpuRegKind.FLT, True), 0),
                            local_reg_stats.get((regs.CpuRegKind.FLT, False), 0))
    if debug:
        print(f"@@ FLT NEEDED {needed_flt.global_lac} {needed_flt.global_not_lac} "
              f"{needed_flt.local_lac} {needed_flt.local_not_lac}", file=debug)

    flt_global_lac, flt_global_not_lac = _GetRegPoolsForGlobals(
        needed_flt, regs.FLT_REGS_MASK & regs.FLT_LAC_REGS_MASK,
                    regs.FLT_REGS_MASK & ~regs.FLT_LAC_REGS_MASK, pre_allocated_mask_flt)
    if debug:
        print(f"@@ FLT POOL {flt_global_lac:x} {flt_global_not_lac:x}", file=debug)

    to_be_spilled += regs.AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(regs.CpuRegKind.FLT, True)], flt_global_lac, 0)
    to_be_spilled += regs.AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(regs.CpuRegKind.FLT, False)],
        flt_global_not_lac & ~regs.FLT_LAC_REGS_MASK,
        flt_global_not_lac & regs.FLT_LAC_REGS_MASK)

    reg_alloc.FunSpillRegs(fun, o.DK.U32, to_be_spilled, prefix="$gspill")

    # Recompute Everything (TODO: make this more selective to reduce work)
    reg_stats.FunComputeRegStatsExceptLAC(fun)
    reg_stats.FunDropUnreferencedRegs(fun)
    liveness.FunComputeLivenessInfo(fun)
    reg_stats.FunComputeRegStatsLAC(fun)
    reg_stats.FunSeparateLocalRegUsage(fun)
示例#9
0
    def testBaseRegPropagation1(self):
        code = io.StringIO(r"""
 .mem COUNTER 4 RW
 .data 4 [0]

.fun foo NORMAL [] = []
    .stk array 4 4000
    .reg S32 [x]
    .reg U32 [y] 
    .reg A32 [counter] 

.bbl start
    lea.mem counter = COUNTER 0
    ld x = counter 0
    add x = x 1
    st counter 0 = x

    lea.mem counter = COUNTER 100
    ld x = counter 100
    add x = x 1
    st counter 300 = x

    mov y 666
    lea.mem counter = COUNTER 0
    ld x = counter y
    add x = x 1
    st counter y = x

    lea.stk counter = array 0
    ld x = counter 0
    add x = x 1
    st counter 0 = x

    lea.stk counter = array 100
    ld x = counter 100
    add x = x 1
    st counter 300 = x

    mov y 666
    lea.stk counter = array 0
    ld x = counter y
    add x = x 1
    st counter y = x

    ret
         """)

        unit = serialize.UnitParseFromAsm(code, False)
        fun = unit.fun_syms["foo"]
        bbl = fun.bbls[0]

        cfg.FunInitCFG(fun)
        liveness.FunComputeLivenessInfo(fun)
        reaching_defs.FunComputeReachingDefs(fun)
        reaching_defs.FunPropagateConsts(fun)
        # reaching_defs.FunConstantFold(fun, True)
        reaching_defs.FunLoadStoreSimplify(fun)

        liveness.FunRemoveUselessInstructions(fun)
        print("\n".join(serialize.FunRenderToAsm(fun)))
        # all ld/st were re-written
        for ins in bbl.inss:
            self.assertIn(
                ins.opcode.name,
                {"ret", "add", "ld.mem", "st.mem", "ld.stk", "st.stk"})
示例#10
0
    def testD(self):
        code = io.StringIO(r"""
.fun arm_syscall_write SIGNATURE [S32] = [S32 A32 U32]

.fun putchar NORMAL [] = [U32]

.fun writeln NORMAL [] = [A32 U32]
# live_out: ['r0', 'r1']
.reg S32 [$r0_S32 dummy]
.reg U32 [$r0_U32 $r1_U32 $r2_U32 len]
.reg A32 [$r0_A32 $r1_A32 buf]
.bbl start
    mov buf $r0_A32@r0                     # 0
    mov len $r1_U32@r1                     # 1
    mov $r2_U32@r2 len                     # 2
    mov $r1_A32@r1 buf                     # 3
    mov $r0_S32@r0 1                       # 4
    syscall arm_syscall_write 4:U32        # 5
    mov dummy $r0_S32@r0                   # 6
    mov $r0_U32@r0 10                      # 7
    bsr putchar                            # 8
    ret                                    # 9
""")
        cpu_regs = {
            "r0": ir.CpuReg("r0", 0),
            "r1": ir.CpuReg("r1", 1),
            "r2": ir.CpuReg("r2", 2)
        }
        unit = serialize.UnitParseFromAsm(code, cpu_regs=cpu_regs)

        fun = unit.fun_syms["arm_syscall_write"]
        fun.cpu_live_out = {cpu_regs["r0"]}
        fun.cpu_live_in = {cpu_regs["r0"], cpu_regs["r1"], cpu_regs["r2"]}

        fun = unit.fun_syms["putchar"]
        fun.cpu_live_in = {cpu_regs["r0"]}

        fun = unit.fun_syms["writeln"]
        cfg.FunSplitBblsAtTerminators(fun)
        cfg.FunInitCFG(fun)
        cfg.FunRemoveUnconditionalBranches(fun)
        cfg.FunRemoveEmptyBbls(fun)
        liveness.FunComputeLivenessInfo(fun)
        ranges = liveness.BblGetLiveRanges(fun.bbls[0], fun,
                                           fun.bbls[0].live_out)
        ranges.sort()
        print("TestD")
        for lr in ranges:
            print(lr)

        lr_r0 = liveness.LiveRange(liveness.BEFORE_BBL, 0,
                                   fun.reg_syms["$r0_A32"], 1)
        lr_r1 = liveness.LiveRange(liveness.BEFORE_BBL, 1,
                                   fun.reg_syms["$r1_U32"], 1)
        lr_buf = liveness.LiveRange(0, 3, fun.reg_syms["buf"], 1)
        lr_len = liveness.LiveRange(1, 2, fun.reg_syms["len"], 1)
        lr_r0_2 = liveness.LiveRange(5, 6, fun.reg_syms["$r0_S32"], 1)

        expected = [
            lr_r0,
            lr_r1,
            liveness.LiveRange(0,
                               0,
                               reg=ir.REG_INVALID,
                               num_uses=1,
                               uses=[lr_r0]),
            lr_buf,
            liveness.LiveRange(1,
                               1,
                               reg=ir.REG_INVALID,
                               num_uses=1,
                               uses=[lr_r1]),
            lr_len,
            liveness.LiveRange(2,
                               2,
                               reg=ir.REG_INVALID,
                               num_uses=1,
                               uses=[lr_len]),
            liveness.LiveRange(2, 5, fun.reg_syms["$r2_U32"], 0),
            liveness.LiveRange(3,
                               3,
                               reg=ir.REG_INVALID,
                               num_uses=1,
                               uses=[lr_buf]),
            liveness.LiveRange(3, 5, fun.reg_syms["$r1_A32"], 0),
            liveness.LiveRange(4, 5, fun.reg_syms["$r0_S32"], 0),
            lr_r0_2,
            liveness.LiveRange(6,
                               6,
                               reg=ir.REG_INVALID,
                               num_uses=1,
                               uses=[lr_r0_2]),
            liveness.LiveRange(6, liveness.NO_USE, fun.reg_syms["dummy"], 0),
            liveness.LiveRange(7, 8, fun.reg_syms["$r0_U32"], 0),
        ]
        # self.assertSequenceEqual(ranges, expected) # this does not work because of the uses field
        self.assertEqual(len(ranges), len(expected))
        for a, b in zip():
            self.assertEqual(a, b)
示例#11
0
def PhaseGlobalRegAlloc(fun: ir.Fun, _opt_stats: Dict[str, int], fout):
    """
    These phase introduces CpuReg for globals and situations where we have no choice
    which register to use, e.g. function parameters and results ("pre-allocated" regs).

    After this function has been run all globals will have a valid cpu_reg and
    we have to be careful to not introduce new globals subsequently.
    IF not enough cpu_regs are available for all globals, some of them will be spilled.

    The whole global allocator is terrible and so is the the decision which globals
    to spill is extremely simplistic at this time.

    We sepatate global from local register allocation so that we can use a straight
    forward linear scan allocator for the locals. This allocator assumes that
    each register is defined exactly once and hence does not work for globals.
    """

    if fout:
        print("#" * 60, file=fout)
        print(f"# GlobalRegAlloc {fun.name}", file=fout)
        print("#" * 60, file=fout)

    regs.FunPushargConversion(fun)
    regs.FunPopargConversion(fun)

    reg_stats.FunComputeRegStatsExceptLAC(fun)
    reg_stats.FunDropUnreferencedRegs(fun)
    liveness.FunComputeLivenessInfo(fun)
    reg_stats.FunComputeRegStatsLAC(fun)

    # Note: REG_KIND_MAP_ARM maps all non-float to registers to S32
    local_reg_stats = reg_stats.FunComputeBblRegUsageStats(
        fun, REG_KIND_MAP_ARM)
    # we  have introduced some cpu regs in previous phases - do not treat them as globals
    global_reg_stats = _FunGlobalRegStats(fun, REG_KIND_MAP_ARM)
    DumpRegStats(fun, local_reg_stats, fout)

    pre_allocated: Set[ir.CpuReg] = {
        reg.cpu_reg
        for reg in fun.regs if reg.HasCpuReg()
    }

    # Handle GPR regs
    needed_gpr = RegsNeeded(
        len(global_reg_stats[(o.DK.S32, True)]),
        len(global_reg_stats[(o.DK.S32, False)]),
        local_reg_stats.get((o.DK.S32, True), 0),
        # TODO: avoid fudge factor
        1 + local_reg_stats.get((o.DK.S32, False), 0))
    gpr_global_lac, gpr_global_not_lac = _GetRegPoolsForGlobals(
        needed_gpr, regs.GPR_CALLEE_SAVE_REGS.copy(),
        regs.GPR_NOT_LAC_REGS.copy(), pre_allocated)

    to_be_spilled: List[ir.Reg] = []
    to_be_spilled += _AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(o.DK.S32, True)], gpr_global_lac)
    to_be_spilled += _AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(o.DK.S32, False)], gpr_global_not_lac)

    # Handle Float regs
    needed_flt = RegsNeeded(
        len(global_reg_stats[(o.DK.F32, True)]) +
        2 * len(global_reg_stats[(o.DK.F64, True)]),
        len(global_reg_stats[(o.DK.F32, False)]) +
        2 * len(global_reg_stats[(o.DK.F64, True)]),
        local_reg_stats.get((o.DK.F32, True), 0) + 2 * local_reg_stats.get(
            (o.DK.F64, True), 0),
        # TODO: avoid fudge factor
        2 + local_reg_stats.get(
            (o.DK.F32, False), 0) + 2 * local_reg_stats.get(
                (o.DK.F64, False), 0))

    flt_global_lac, flt_global_not_lac = _GetRegPoolsForGlobals(
        needed_flt, regs.FLT_CALLEE_SAVE_REGS.copy(),
        regs.FLT_PARAMETER_REGS.copy(), pre_allocated)

    to_be_spilled += _AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(o.DK.F64, True)] +
        global_reg_stats[(o.DK.F32, True)], flt_global_lac)
    to_be_spilled += _AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(o.DK.F64, False)] +
        global_reg_stats[(o.DK.F32, False)], flt_global_not_lac)

    reg_alloc.FunSpillRegs(fun, o.DK.U32, to_be_spilled)

    # Recompute Everything (TODO: make this more selective)
    reg_stats.FunComputeRegStatsExceptLAC(fun)
    reg_stats.FunDropUnreferencedRegs(fun)
    liveness.FunComputeLivenessInfo(fun)
    reg_stats.FunComputeRegStatsLAC(fun)
    reg_stats.FunSeparateLocalRegUsage(fun)
示例#12
0
def PhaseLegalization(fun: ir.Fun, unit: ir.Unit, _opt_stats: Dict[str, int],
                      fout):
    """
    Does a lot of the heavily lifting so that the instruction selector can remain
    simple and table driven.
    * lift almost all regs to 32bit width
    * rewrite Ins that cannot be expanded
    * rewrite immediates that cannot be expanded except stack offsets which are dealt with in
      another pass

    TODO: missing is a function to change calling signature so that
    """
    fun.cpu_live_in = regs.PushPopInterface.GetCpuRegsForInSignature(
        fun.input_types)
    fun.cpu_live_out = regs.PushPopInterface.GetCpuRegsForOutSignature(
        fun.output_types)
    if fun.kind is not o.FUN_KIND.NORMAL:
        return

    # Getting rid of the pusharg/poparg now relieves us form having to pay to attention to  the
    # invariant that pushargs/popargs must be adjacent.
    lowering.FunPushargConversion(fun, regs.PushPopInterface)
    lowering.FunPopargConversion(fun, regs.PushPopInterface)

    # We did not bother with this addressing mode
    # TODO: we like can avoid this by adding more cases to isel_tab.py
    lowering.FunEliminateStkLoadStoreWithRegOffset(fun,
                                                   base_kind=o.DK.A64,
                                                   offset_kind=o.DK.S32)

    # TODO: switch this to a WithRegOffset flavor
    lowering.FunEliminateMemLoadStore(fun,
                                      base_kind=o.DK.A64,
                                      offset_kind=o.DK.S32)

    lowering.FunEliminateCopySign(fun)
    # TODO: support a few special cases in the isel, e.g. cmpXX a 0, 1, x, y
    lowering.FunEliminateCmp(fun)

    canonicalize.FunCanonicalize(fun)
    # TODO: add a cfg linearization pass to improve control flow
    optimize.FunCfgExit(
        fun, unit)  # not this may affect immediates as it flips branches

    # Handle most overflowing immediates.
    # This excludes immediates related to stack offsets which have not been determined yet
    _FunRewriteOutOfBoundsImmediates(fun, unit)

    # mul/div/rem need special treatment
    _FunRewriteDivRem(fun)

    _FunRewriteIntoAABForm(fun, unit)

    # Recompute Everything (TODO: make this more selective to reduce work)
    reg_stats.FunComputeRegStatsExceptLAC(fun)
    reg_stats.FunDropUnreferencedRegs(fun)
    liveness.FunComputeLivenessInfo(fun)
    reg_stats.FunComputeRegStatsLAC(fun)
    reg_stats.FunSeparateLocalRegUsage(
        fun
    )  # this has special hacks to avoid undoing _FunRewriteIntoAABForm()
    # DumpRegStats(fun, local_reg_stats)

    # if fun.name == "fibonacci": DumpFun("end of legal", fun)
    # if fun.name == "write_s": exit(1)
    sanity.FunCheck(fun, None)
示例#13
0
def PhaseGlobalRegAlloc(fun: ir.Fun, _opt_stats: Dict[str, int], fout):
    """
    These phase introduces CpuReg for globals and situations where we have no choice
    which register to use, e.g. function parameters and results ("pre-allocated" regs).

    After this function has been run all globals will have a valid cpu_reg and
    we have to be careful to not introduce new globals subsequently.
    If not enough cpu_regs are available for all globals, some of them will be spilled.
    We err on the site of spilling more, the biggest danger is to over-allocate and then
    lack registers for intra-bbl register allocation.

    The whole global allocator is terrible and so is the the decision which globals
    to spill is extremely simplistic at this time.

    We separate global from local register allocation so that we can use a straight
    forward linear scan allocator for the locals. This allocator assumes that
    each register is defined exactly once and hence does not work for globals.
    """

    if fout:
        print("#" * 60, file=fout)
        print(f"# GlobalRegAlloc {fun.name}", file=fout)
        print("#" * 60, file=fout)

    # replaces pusharg and poparg instructions and replace them with moves
    # The moves will use pre-allocated regs (the once use for argument/result paassing)
    # regs.FunPushargConversion(fun)
    # regs.FunPopargConversion(fun)

    reg_stats.FunComputeRegStatsExceptLAC(fun)
    reg_stats.FunDropUnreferencedRegs(fun)
    liveness.FunComputeLivenessInfo(fun)
    reg_stats.FunComputeRegStatsLAC(fun)

    # Note: REG_KIND_MAP_ARM maps all non-float to registers to S32
    local_reg_stats = reg_stats.FunComputeBblRegUsageStats(
        fun, REG_KIND_TO_CPU_KIND)
    # we  have introduced some cpu regs in previous phases - do not treat them as globals
    global_reg_stats = _FunGlobalRegStats(fun, REG_KIND_TO_CPU_KIND)
    DumpRegStats(fun, local_reg_stats, fout)

    # Handle GPR regs
    pre_allocated_mask_gpr = 0
    for reg in fun.regs:
        if reg.HasCpuReg() and reg.cpu_reg.kind == regs.CpuRegKind.GPR:
            pre_allocated_mask_gpr |= regs.A32RegToAllocMask(reg.cpu_reg)
    # compute the number of regs needed if had indeed unlimited regs
    needed_gpr = RegsNeeded(
        len(global_reg_stats[(regs.CpuRegKind.GPR, True)]),
        len(global_reg_stats[(regs.CpuRegKind.GPR, False)]),
        local_reg_stats.get((regs.CpuRegKind.GPR, True), 0),
        local_reg_stats.get((regs.CpuRegKind.GPR, False), 0))
    # earmark some regs for globals
    gpr_global_lac, gpr_global_not_lac = _GetRegPoolsForGlobals(
        needed_gpr, regs.GPR_REGS_MASK & regs.GPR_LAC_REGS_MASK,
        regs.GPR_REGS_MASK & ~regs.GPR_LAC_REGS_MASK, pre_allocated_mask_gpr)

    # assign the earmarked regs to some globals and spill the rest
    to_be_spilled: List[ir.Reg] = []
    to_be_spilled += regs.AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(regs.CpuRegKind.GPR, True)], gpr_global_lac, 0)

    to_be_spilled += regs.AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(regs.CpuRegKind.GPR, False)],
        gpr_global_not_lac & ~regs.GPR_LAC_REGS_MASK,
        gpr_global_not_lac & regs.GPR_LAC_REGS_MASK)

    # Handle Float regs
    pre_allocated_mask_flt = 0
    for reg in fun.regs:
        if reg.HasCpuReg() and reg.cpu_reg.kind != regs.CpuRegKind.GPR:
            pre_allocated_mask_flt |= regs.A32RegToAllocMask(reg.cpu_reg)
    # repeat the same process as we did for GPR regs
    needed_flt = RegsNeeded(
        len(global_reg_stats[(regs.CpuRegKind.FLT, True)]) +
        2 * len(global_reg_stats[(regs.CpuRegKind.DBL, True)]),
        len(global_reg_stats[(regs.CpuRegKind.FLT, False)]) +
        2 * len(global_reg_stats[(regs.CpuRegKind.DBL, False)]),
        local_reg_stats.get(
            (regs.CpuRegKind.FLT, True), 0) + 2 * local_reg_stats.get(
                (regs.CpuRegKind.DBL, True), 0),
        local_reg_stats.get(
            (regs.CpuRegKind.FLT, False), 0) + 2 * local_reg_stats.get(
                (regs.CpuRegKind.DBL, False), 0))
    flt_global_lac, flt_global_not_lac = _GetRegPoolsForGlobals(
        needed_flt, regs.FLT_REGS_MASK & regs.FLT_LAC_REGS_MASK,
        regs.FLT_REGS_MASK & ~regs.FLT_LAC_REGS_MASK, pre_allocated_mask_flt)

    to_be_spilled += regs.AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(regs.CpuRegKind.DBL, True)] +
        global_reg_stats[(regs.CpuRegKind.FLT, True)], flt_global_lac, 0)

    to_be_spilled += regs.AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(regs.CpuRegKind.DBL, False)] +
        global_reg_stats[(regs.CpuRegKind.FLT, False)],
        flt_global_not_lac & ~regs.FLT_LAC_REGS_MASK,
        flt_global_not_lac & regs.FLT_LAC_REGS_MASK)

    reg_alloc.FunSpillRegs(fun, o.DK.U32, to_be_spilled, prefix="$gspill")

    # Recompute Everything (TODO: make this more selective)
    reg_stats.FunComputeRegStatsExceptLAC(fun)
    reg_stats.FunDropUnreferencedRegs(fun)
    liveness.FunComputeLivenessInfo(fun)
    reg_stats.FunComputeRegStatsLAC(fun)
    # establish per bbl SSA form by splitting liveranges
    reg_stats.FunSeparateLocalRegUsage(fun)