def create_c_asm_file(self, funcs_text, func, out_dir, func_name): if options.get_compiler() == "GCC": out_lines = self.get_gcc_inc_header() else: out_lines = [] if self.parent and isinstance(self.parent, CommonSegGroup): if func in self.parent.rodata_syms: func_rodata = list({s for s in self.parent.rodata_syms[func] if s.disasm_str}) func_rodata.sort(key=lambda s:s.vram_start) if len(func_rodata) > 0: rsub = self.parent.get_subsegment_for_ram(func_rodata[0].vram_start) if rsub and rsub.type != "rodata": out_lines.append(".section .rodata") for sym in func_rodata: if sym.disasm_str: out_lines.extend(sym.disasm_str.replace("\n\n", "\n").split("\n")) out_lines.append("") out_lines.append(".section .text") out_lines.append("") out_lines.extend(funcs_text[func][0]) out_lines.append("") outpath = Path(os.path.join(out_dir, self.name, func_name + ".s")) outpath.parent.mkdir(parents=True, exist_ok=True) with open(outpath, "w", newline="\n") as f: f.write("\n".join(out_lines)) self.log(f"Disassembled {func_name} to {outpath}")
def get_global_asm_funcs(c_file): with open(c_file, "r") as f: text = CommonSegC.strip_c_comments(f.read()) if options.get_compiler() in [GCC, SN64]: return set(CommonSegC.find_include_asm(text)) else: return set( m.group(2) for m in CommonSegC.C_GLOBAL_ASM_IDO_RE.finditer(text))
def create_c_file(self, funcs_text, asm_out_dir, c_path): c_lines = self.get_c_preamble() for func in funcs_text: func_name = self.get_symbol(func, type="func", local_only=True).name if options.get_compiler() == "GCC": c_lines.append("INCLUDE_ASM(s32, \"{}\", {});".format( self.name, func_name)) else: asm_outpath = Path( os.path.join(asm_out_dir, self.name, func_name + ".s")) rel_asm_outpath = os.path.relpath(asm_outpath, options.get_base_path()) c_lines.append(f"#pragma GLOBAL_ASM(\"{rel_asm_outpath}\")") c_lines.append("") Path(c_path).parent.mkdir(parents=True, exist_ok=True) with open(c_path, "w") as f: f.write("\n".join(c_lines)) print(f"Wrote {self.name} to {c_path}")
def create_c_file(self, funcs_text, asm_out_dir, c_path): c_lines = self.get_c_preamble() for func in funcs_text: func_name = self.parent.get_symbol(func, type="func", local_only=True).name # Terrible hack to "auto-decompile" empty functions # TODO move disassembly into funcs_text or somewhere we can access it from here if (options.get_auto_decompile_empty_functions() and len(funcs_text[func][0]) == 3 and funcs_text[func][0][1][-3:] in ["$ra", "$31"] and funcs_text[func][0][2][-3:] == "nop"): c_lines.append("void " + func_name + "(void) {") c_lines.append("}") else: if options.get_compiler() in [GCC, SN64]: if options.get_use_legacy_include_asm(): rel_asm_out_dir = asm_out_dir.relative_to( options.get_nonmatchings_path()) c_lines.append( f'INCLUDE_ASM(s32, "{rel_asm_out_dir / self.name}", {func_name});' ) else: c_lines.append( f'INCLUDE_ASM("{asm_out_dir / self.name}", {func_name});' ) else: asm_outpath = Path( os.path.join(asm_out_dir, self.name, func_name + ".s")) rel_asm_outpath = os.path.relpath(asm_outpath, options.get_base_path()) c_lines.append(f'#pragma GLOBAL_ASM("{rel_asm_outpath}")') c_lines.append("") Path(c_path).parent.mkdir(parents=True, exist_ok=True) with open(c_path, "w") as f: f.write("\n".join(c_lines)) log.write(f"Wrote {self.name} to {c_path}")
def split(self, rom_bytes: bytes): if not self.rom_start == self.rom_end: asm_out_dir = options.get_asm_path() / "nonmatchings" / self.dir asm_out_dir.mkdir(parents=True, exist_ok=True) is_new_c_file = False c_path = self.out_path() if c_path: if not os.path.exists(c_path) and options.get("create_new_c_files", True): self.create_c_file(self.funcs_text, asm_out_dir, c_path) is_new_c_file = True for func in self.funcs_text: func_name = self.parent.get_symbol(func, type="func", local_only=True).name if options.get_compiler() == "GCC": if func_name not in self.defined_funcs: self.create_c_asm_file(self.funcs_text, func, asm_out_dir, func_name) else: if func_name in self.global_asm_funcs or is_new_c_file: self.create_c_asm_file(self.funcs_text, func, asm_out_dir, func_name)
def create_c_file(self, asm_out_dir, c_path): c_lines = self.get_c_preamble() for func in self.text_section.symbolList: assert isinstance(func, spimdisasm.mips.symbols.SymbolFunction) # Terrible hack to "auto-decompile" empty functions if (options.get_auto_decompile_empty_functions() and func.instructions[0].isJrRa() and func.instructions[1].isNop()): c_lines.append("void " + func.getName() + "(void) {") c_lines.append("}") else: if options.get_compiler() in [GCC, SN64]: if options.get_use_legacy_include_asm(): rel_asm_out_dir = asm_out_dir.relative_to( options.get_nonmatchings_path()) c_lines.append( f'INCLUDE_ASM(s32, "{rel_asm_out_dir / self.name}", {func.getName()});' ) else: c_lines.append( f'INCLUDE_ASM("{asm_out_dir / self.name}", {func.getName()});' ) else: asm_outpath = Path( os.path.join(asm_out_dir, self.name, func.getName() + ".s")) rel_asm_outpath = os.path.relpath(asm_outpath, options.get_base_path()) c_lines.append(f'#pragma GLOBAL_ASM("{rel_asm_outpath}")') c_lines.append("") Path(c_path).parent.mkdir(parents=True, exist_ok=True) with open(c_path, "w") as f: f.write("\n".join(c_lines)) log.write(f"Wrote {self.name} to {c_path}")
def create_c_file(self, funcs_text, asm_out_dir, c_path): c_lines = self.get_c_preamble() for func in funcs_text: func_name = self.parent.get_symbol(func, type="func", local_only=True).name # Terrible hack to "auto-decompile" empty functions # TODO move disassembly into funcs_text or somewhere we can access it from here if len(funcs_text[func][0]) == 3 and funcs_text[func][0][1][-3:] == "$ra" and funcs_text[func][0][2][-3:] == "nop": c_lines.append("void " + func_name + "(void) {") c_lines.append("}") else: if options.get_compiler() == "GCC": c_lines.append("INCLUDE_ASM(s32, \"{}\", {});".format(self.name, func_name)) else: asm_outpath = Path(os.path.join(asm_out_dir, self.dir, self.name, func_name + ".s")) rel_asm_outpath = os.path.relpath(asm_outpath, options.get_base_path()) c_lines.append(f"#pragma GLOBAL_ASM(\"{rel_asm_outpath}\")") c_lines.append("") Path(c_path).parent.mkdir(parents=True, exist_ok=True) with open(c_path, "w") as f: f.write("\n".join(c_lines)) log.write(f"Wrote {self.name} to {c_path}")
def configure_disassembler(): # Configure spimdisasm spimdisasm.common.GlobalConfig.PRODUCE_SYMBOLS_PLUS_OFFSET = True spimdisasm.common.GlobalConfig.TRUST_USER_FUNCTIONS = True spimdisasm.common.GlobalConfig.TRUST_JAL_FUNCTIONS = True spimdisasm.common.GlobalConfig.GLABEL_ASM_COUNT = False if options.rom_address_padding(): spimdisasm.common.GlobalConfig.ASM_COMMENT_OFFSET_WIDTH = 6 else: spimdisasm.common.GlobalConfig.ASM_COMMENT_OFFSET_WIDTH = 0 # spimdisasm is not performing any analyzis on non-text sections so enabling this options is pointless spimdisasm.common.GlobalConfig.AUTOGENERATED_NAMES_BASED_ON_SECTION_TYPE = False spimdisasm.common.GlobalConfig.AUTOGENERATED_NAMES_BASED_ON_DATA_TYPE = False spimdisasm.common.GlobalConfig.SYMBOL_FINDER_FILTERED_ADDRESSES_AS_HILO = False rabbitizer.config.regNames_userFpcCsr = False rabbitizer.config.regNames_vr4300Cop0NamedRegisters = False rabbitizer.config.misc_opcodeLJust = options.mnemonic_ljust() - 1 rabbitizer.config.regNames_gprAbiNames = rabbitizer.Abi.fromStr( options.get_mips_abi_gpr() ) rabbitizer.config.regNames_fprAbiNames = rabbitizer.Abi.fromStr( options.get_mips_abi_float_regs() ) if options.get_endianess() == "big": spimdisasm.common.GlobalConfig.ENDIAN = spimdisasm.common.InputEndian.BIG else: spimdisasm.common.GlobalConfig.ENDIAN = spimdisasm.common.InputEndian.LITTLE rabbitizer.config.pseudos_pseudoMove = False selectedCompiler = options.get_compiler() if selectedCompiler == compiler.SN64: rabbitizer.config.regNames_namedRegisters = False rabbitizer.config.toolchainTweaks_sn64DivFix = True rabbitizer.config.toolchainTweaks_treatJAsUnconditionalBranch = True spimdisasm.common.GlobalConfig.ASM_COMMENT = False spimdisasm.common.GlobalConfig.SYMBOL_FINDER_FILTERED_ADDRESSES_AS_HILO = False spimdisasm.common.GlobalConfig.COMPILER = spimdisasm.common.Compiler.SN64 elif selectedCompiler == compiler.GCC: rabbitizer.config.toolchainTweaks_treatJAsUnconditionalBranch = True spimdisasm.common.GlobalConfig.COMPILER = spimdisasm.common.Compiler.GCC elif selectedCompiler == compiler.IDO: spimdisasm.common.GlobalConfig.COMPILER = spimdisasm.common.Compiler.IDO spimdisasm.common.GlobalConfig.GP_VALUE = options.get_gp() spimdisasm.common.GlobalConfig.ASM_TEXT_LABEL = options.get_asm_function_macro() spimdisasm.common.GlobalConfig.ASM_DATA_LABEL = options.get_asm_data_macro() spimdisasm.common.GlobalConfig.ASM_TEXT_END_LABEL = options.get_asm_end_label() if spimdisasm.common.GlobalConfig.ASM_TEXT_LABEL == ".globl": spimdisasm.common.GlobalConfig.ASM_TEXT_ENT_LABEL = ".ent" spimdisasm.common.GlobalConfig.ASM_TEXT_FUNC_AS_LABEL = True spimdisasm.common.GlobalConfig.LINE_ENDS = options.c_newline() if options.get_platform() == "n64": symbols.spim_context.fillDefaultBannedSymbols()
def add_labels(self): ret = {} function_macro = options.get_asm_function_macro() data_macro = options.get_asm_data_macro() for func_addr in self.funcs: func_text = [] func = self.funcs[func_addr] # Add function label func_text.append(f"{function_macro} {func.name}") if options.get_compiler() == SN64: func_text.append(f".ent {func.name}") func_text.append(f"{func.name}:") indent_next = False mnemonic_ljust = options.mnemonic_ljust() rom_addr_padding = options.rom_address_padding() for insn in func.insns: insn_addr = insn.instruction.address # Add a label if we need one if insn_addr in self.parent.jtbl_glabels_to_add: func_text.append( f"{data_macro} L{insn_addr:X}_{insn.rom_addr:X}") elif insn_addr in self.parent.labels_to_add: self.parent.labels_to_add.remove(insn_addr) func_text.append(".L{:X}:".format(insn_addr)) if rom_addr_padding: rom_str = "{0:0{1}X}".format(insn.rom_addr, rom_addr_padding) else: rom_str = "{:X}".format(insn.rom_addr) if options.get_compiler() == SN64: asm_comment = "" else: asm_comment = "/* {} {:X} {} */".format( rom_str, insn_addr, insn.instruction.bytes.hex().upper()) if insn.is_hi: assert insn.hi_lo_sym op_str = ", ".join( insn.op_str.split(", ")[:-1] + [f"%hi({insn.hi_lo_sym.name}{insn.sym_offset_str})"]) elif insn.is_lo: assert insn.hi_lo_sym op_str = ", ".join( insn.op_str.split(", ")[:-1] + [ f"%lo({insn.hi_lo_sym.name}{insn.sym_offset_str}){insn.hi_lo_reg}" ]) elif insn.is_gp: op_str = ", ".join( insn.op_str.split(", ")[:-1] + [ f"%gp_rel({insn.hi_lo_sym.name}{insn.sym_offset_str})($gp)" ]) else: op_str = insn.op_str if self.is_branch_insn(insn.instruction.mnemonic): branch_addr = int( insn.instruction.op_str.split(",")[-1].strip(), 0) if branch_addr in self.parent.jtbl_glabels_to_add: label_str = f"L{branch_addr:X}_{self.ram_to_rom(branch_addr):X}" op_str = ", ".join( insn.op_str.split(", ")[:-1] + [label_str]) insn_text = insn.mnemonic if indent_next: indent_next = False insn_text = " " + insn_text asm_insn_text = " {}{}".format( insn_text.ljust(mnemonic_ljust), op_str).rstrip() func_text.append(asm_comment + asm_insn_text) if (insn.instruction.mnemonic != "branch" and insn.instruction.mnemonic.startswith("b") or insn.instruction.mnemonic.startswith("j")): indent_next = True end_label = options.get_asm_end_label() if end_label: func_text.append(f"{end_label} {func.name}") ret[func_addr] = (func_text, func.rom) if options.find_file_boundaries(): # If this is not the last function in the file if func_addr != list(self.funcs.keys())[-1]: # Find where the function returns jr_pos: Optional[int] = None for i, insn in enumerate(reversed(func.insns)): if (insn.instruction.mnemonic == "jr" and insn.instruction.op_str in ["$ra", "$31"]): jr_pos = i break # If there is more than 1 nop after the return if (jr_pos is not None and jr_pos > 1 and self.is_nops([ insn.instruction for insn in func.insns[-jr_pos + 1:] ])): new_file_addr = func.insns[-1].rom_addr + 4 if (new_file_addr % 16) == 0: if not self.parent.reported_file_split: self.parent.reported_file_split = True print( f"Segment {self.name}, function at vram {func_addr:X} ends with extra nops, indicating a likely file split." ) print( "File split suggestions for this segment will follow in config yaml format:" ) print(f" - [0x{new_file_addr:X}, asm]") return ret
def determine_symbols(self): hi_lo_max_distance = options.hi_lo_max_distance() for func_addr in self.funcs: func = self.funcs[func_addr] func_end_addr = func.insns[-1].instruction.address + 4 possible_jtbl_jumps = [(k, v) for k, v in self.parent.jtbl_jumps.items() if k >= func_addr and k < func_end_addr] possible_jtbl_jumps.sort(key=lambda x: x[0]) for i in range(len(func.insns)): hi_insn: CsInsn = func.insns[i].instruction # Ensure the first item in the list is always ahead of where we're looking while (len(possible_jtbl_jumps) > 0 and possible_jtbl_jumps[0][0] < hi_insn.address): del possible_jtbl_jumps[0] # Find gp relative reads and writes e.g lw $a1, 0x670($gp) if hi_insn.op_str.endswith("($gp)"): gp_base = options.get_gp() if gp_base is None: log.error( "gp_value not set in yaml, can't calculate %gp_rel reloc value for " + hi_insn.op_str) op_split = hi_insn.op_str.split(", ") gp_offset = op_split[1][:-5] # extract the 0x670 part if len(gp_offset) == 0: gp_offset = 0 else: gp_offset = int(gp_offset, 16) symbol_addr = gp_base + gp_offset sym = self.parent.create_symbol(symbol_addr, offsets=True, reference=True) offset = symbol_addr - sym.vram_start offset_str = f"+0x{offset:X}" if self.parent: self.parent.check_rodata_sym(func_addr, sym) self.update_access_mnemonic(sym, hi_insn.mnemonic) func.insns[i].is_gp = True func.insns[i].hi_lo_sym = sym func.insns[i].sym_offset_str = offset_str # All hi/lo pairs start with a lui elif hi_insn.mnemonic == "lui": op_split = hi_insn.op_str.split(", ") hi_reg = op_split[0] if not op_split[1].startswith("0x"): continue lui_val = int(op_split[1], 0) # Assumes all luis are going to load from 0x80000000 or higher (maybe false) if lui_val >= 0x8000: # Iterate over the next few instructions to see if we can find a matching lo for j in range( i + 1, min(i + hi_lo_max_distance, len(func.insns))): lo_insn = func.insns[j].instruction s_op_split = lo_insn.op_str.split(", ") if lo_insn.mnemonic == "lui" and hi_reg == s_op_split[ 0]: break if lo_insn.mnemonic in ["addiu", "ori"]: lo_reg = s_op_split[-2] else: lo_reg = s_op_split[-1][s_op_split[-1]. rfind("(") + 1:-1] if hi_reg == lo_reg: if lo_insn.mnemonic not in [ "addiu", "lw", "sw", "lh", "sh", "lhu", "lb", "sb", "lbu", "lwc1", "swc1", "ldc1", "sdc1", ]: break # Match! reg_ext = "" # I forgot what this is doing junk_search = re.search( r"[\(]", s_op_split[-1]) if junk_search is not None: if junk_search.start() == 0: break s_str = s_op_split[-1][:junk_search.start( )] reg_ext = s_op_split[-1][junk_search.start( ):] else: s_str = s_op_split[-1] if options.get_compiler() == SN64: reg_ext = CommonSegCodeSubsegment.replace_reg_names( reg_ext) symbol_addr = (lui_val * 0x10000) + int( s_str, 0) sym: Optional[Symbol] = None offset_str = "" # If the symbol is likely in the rodata section if (not self.parent.text_follows_rodata and symbol_addr > func_addr) or ( self.parent.text_follows_rodata and symbol_addr < func_addr): # Sanity check that the symbol is within this segment's vram if (self.parent.vram_end and symbol_addr < self.parent.vram_end): # If we've seen possible jumps to a jumptable and this symbol isn't too close to the end of the function if (len(possible_jtbl_jumps) > 0 and func_end_addr - lo_insn.address >= 0x30): for jump in possible_jtbl_jumps: if jump[1] == s_op_split[0]: dist_to_jump = ( possible_jtbl_jumps[0] [0] - lo_insn.address) if dist_to_jump <= 16: sym = self.parent.create_symbol( symbol_addr, reference=True, type="jtbl", local_only=True, ) self.parent.jumptables[ symbol_addr] = ( func_addr, func_end_addr) break if not sym: sym = self.parent.create_symbol( symbol_addr, offsets=True, reference=True) offset = symbol_addr - sym.vram_start if offset != 0: offset_str = f"+0x{offset:X}" if self.parent: self.parent.check_rodata_sym( func_addr, sym) self.update_access_mnemonic( sym, lo_insn.mnemonic) func.insns[i].is_hi = True func.insns[i].hi_lo_sym = sym func.insns[i].sym_offset_str = offset_str func.insns[j].is_lo = True func.insns[j].hi_lo_sym = sym func.insns[j].sym_offset_str = offset_str func.insns[j].hi_lo_reg = reg_ext break
def process_insns(self, insns: List[CsInsn], rom_addr, is_asm=False) -> typing.OrderedDict[int, Symbol]: assert isinstance(self.parent, CommonSegCode) self.parent: CommonSegCode = self.parent ret: typing.OrderedDict[int, Symbol] = OrderedDict() end_func = False start_new_func = True labels = [] big_endian = options.get_endianess() == "big" # Collect labels for insn in insns: if self.is_branch_insn(insn.mnemonic): op_str_split = insn.op_str.split(" ") branch_target = op_str_split[-1] branch_addr = int(branch_target, 0) labels.append((insn.address, branch_addr)) # Main loop for i, insn in enumerate(insns): mnemonic = insn.mnemonic op_str = insn.op_str # If this is non-zero, disasm size insns hard_size = 0 if start_new_func: func: Symbol = self.parent.create_symbol(insn.address, type="func") start_new_func = False if func.size > 4: hard_size = func.size / 4 if options.get_compiler() == SN64: op_str = self.replace_reg_names(op_str) if mnemonic == "move": # Let's get the actual instruction out idx = 3 if big_endian else 0 opcode = insn.bytes[idx] & 0b00111111 if options.get_compiler() == SN64: op_str += ", $0" else: op_str += ", $zero" if opcode == 37: mnemonic = "or" elif opcode == 45: mnemonic = "daddu" elif opcode == 33: mnemonic = "addu" else: print("INVALID INSTRUCTION " + str(insn), opcode) elif mnemonic == "jal": jal_addr = int(op_str, 0) jump_func = self.parent.create_symbol(jal_addr, type="func", reference=True) op_str = jump_func.name elif self.is_branch_insn(insn.mnemonic): op_str_split = op_str.split(" ") branch_target = op_str_split[-1] branch_target_int = int(branch_target, 0) label_sym = self.parent.get_symbol(branch_target_int, type="label", reference=True, local_only=True) if label_sym: label_name = label_sym.name else: self.parent.labels_to_add.add(branch_target_int) label_name = f".L{branch_target[2:].upper()}" op_str = " ".join(op_str_split[:-1] + [label_name]) elif mnemonic in ["mtc0", "mfc0", "mtc2", "mfc2"]: idx = 2 if big_endian else 1 rd = (insn.bytes[idx] & 0xF8) >> 3 op_str = op_str.split(" ")[0] + " $" + str(rd) elif (mnemonic == "break" and op_str in ["6", "7"] and options.get_compiler() == SN64 and not is_asm): # SN64's assembler expands div to have break if dividing by zero # However, the break it generates is different than the one it generates with `break N` # So we replace break instrutions for SN64 with the exact word that the assembler generates when expanding div if op_str == "6": mnemonic = ".word 0x0006000D" op_str = "" elif op_str == "7": mnemonic = ".word 0x0007000D" op_str = "" elif (mnemonic in ["div", "divu"] and options.get_compiler() == SN64 and not is_asm): # SN64's assembler also doesn't like assembling `div $0, a, b` with .set noat active # Removing the $0 fixes this issue if op_str.startswith("$0, "): op_str = op_str[4:] func.insns.append(Instruction(insn, mnemonic, op_str, rom_addr)) rom_addr += 4 size_remaining = hard_size - len( func.insns) if hard_size > 0 else 0 if mnemonic == "jr": # Record potential jtbl jumps if op_str not in ["$ra", "$31"]: self.parent.jtbl_jumps[insn.address] = op_str keep_going = False for label in labels: if (label[0] > insn.address and label[1] <= insn.address ) or (label[0] <= insn.address and label[1] > insn.address): keep_going = True break if not keep_going and not size_remaining: end_func = True continue # Stop here if a size was specified and we have disassembled up to the size if hard_size > 0 and size_remaining == 0: end_func = True if i < len(insns) - 1 and self.parent.get_symbol( insns[i + 1].address, local_only=True, type="func", dead=False): end_func = True if end_func: if (self.is_nops(insns[i:]) or i < len(insns) - 1 and insns[i + 1].mnemonic != "nop"): end_func = False start_new_func = True ret[func.vram_start] = func # Add the last function (or append nops to the previous one) if not self.is_nops([insn.instruction for insn in func.insns]): ret[func.vram_start] = func else: next(reversed(ret.values())).insns.extend(func.insns) return ret