def print_binary_symbols(binary: MachoBinary, verbose: bool = True) -> None: print( f"\n{binary.path.as_posix()} (for architecture {binary.cpu_type.name.lower()})" ) # Parsing the symbol table requires a MachoAnalyzer analyzer = MachoAnalyzer.get_analyzer(binary) # Print imported symbols for sym in analyzer.imported_symbols: segment = "U" source_library_info = "" # Only include this when the verbose flag is set if verbose: segment = "(undefined)" source_library_info = f"(from {get_source_library_of_imported_symbol(analyzer, sym)})" # To match nm output, indent everything by the length of a 64-bit virtual address indent = " " * 11 print(f"{indent} {segment} {sym} {source_library_info}") # Print exported symbols for addr, sym in analyzer.exported_symbol_pointers_to_names.items(): section = binary.section_for_address(addr) section_name = section.name.decode() if verbose: # In verbose mode, report the source section as (segment,section) segment_name = section.cmd.segname.decode() section_name = f"({segment_name},{section_name})" else: # In non-verbose mode, report the source section as the first letter of the section name # Trim out the '__' prefix section_name = section_name[2:3].upper() print(f"{addr:#011x} {section_name} {sym}")
def __init__(self, binary: MachoBinary, instructions: List[CsInsn], method_info: ObjcMethodInfo = None) -> None: from strongarm.macho import MachoAnalyzer try: self.start_address = VirtualMemoryPointer(instructions[0].address) last_instruction = instructions[len(instructions) - 1] # The end-address is right-exclusive self.end_address = VirtualMemoryPointer( last_instruction.address) + MachoBinary.BYTES_PER_INSTRUCTION except IndexError: # this method must have just been a stub with no real instructions! self.start_address = VirtualMemoryPointer(0) self.end_address = VirtualMemoryPointer(0) self.binary = binary self.macho_analyzer = MachoAnalyzer.get_analyzer(binary) self.instructions = instructions self.method_info = method_info self._call_targets: Optional[List[ObjcBranchInstruction]] = None # Find basic-block-boundaries upfront self.basic_blocks = self._find_basic_blocks()
def get_function_analyzer_for_method( cls, binary: MachoBinary, method_info: ObjcMethodInfo) -> "ObjcFunctionAnalyzer": """Get the shared analyzer describing an Objective-C method within the Mach-O binary This method performs the same caching as get_function_analyzer() Args: binary: The MachoBinary containing a function at method_info.imp_addr method_info: The ObjcMethodInfo describing the IMP to be analyzed Returns: An ObjcFunctionAnalyzer suitable for introspecting the provided method Raises: ValueError: Could not get function instructions for the provided method """ # TODO(PT): it seems like this & related methods should be moved to MachoAnalyzer if not method_info.imp_addr: raise ValueError( f"Could not get method implementation address for {method_info}" ) from strongarm.macho.macho_analyzer import MachoAnalyzer analyzer = MachoAnalyzer.get_analyzer(binary) instructions = analyzer.get_function_instructions(method_info.imp_addr) return ObjcFunctionAnalyzer(binary, instructions, method_info=method_info)
def main(): logging.basicConfig(level=logging.INFO) arg_parser = argparse.ArgumentParser(description="dyld_shared_cache symbol map generator") arg_parser.add_argument( "dyld_shared_cache_path", type=str, help="Path to the dyld_shared_cache which should be symbolicated" ) arg_parser.add_argument("output_csv_path", type=str, help="Output CSV path") args = arg_parser.parse_args() dyld_shared_cache = DyldSharedCacheParser(Path(args.dyld_shared_cache_path)) symbols: List[Tuple[VirtualMemoryPointer, str, Path]] = [] # Iterate each image in the DSC, extract it, and record its symbols image_count = len(dyld_shared_cache.embedded_binary_info) for idx, path in enumerate(dyld_shared_cache.embedded_binary_info.keys()): # The DSC has more than 1,000 binaries, so try to free up resources after each image MachoAnalyzer.clear_cache() logging.info(f"({idx+1}/{image_count}) Symbolicating {path}...") try: binary = dyld_shared_cache.get_embedded_binary(path) analyzer = MachoAnalyzer.get_analyzer(binary) for sym, addr in analyzer.exported_symbol_names_to_pointers.items(): symbols.append((VirtualMemoryPointer(addr), sym, path)) except Exception: logging.error(f"Failed to symbolicate {path}") continue with open(str(args.output_csv_path), "w", newline="") as output_csv: csv_writer = csv.writer(output_csv, delimiter=",", quoting=csv.QUOTE_MINIMAL) for row in symbols: csv_writer.writerow(row)
def main() -> None: # XXX(PT): Change this if you want to run a quick script! Write it in strongarm_script() script = False # end of config arg_parser = argparse.ArgumentParser(description="Mach-O Analyzer") arg_parser.add_argument("--verbose", action="store_true", help="Output extra info while analyzing") arg_parser.add_argument("binary_path", metavar="binary_path", type=str, help="Path to binary to analyze") args = arg_parser.parse_args() def configure_logger() -> None: root = logging.getLogger() root.setLevel(logging.DEBUG) ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.INFO) formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s") ch.setFormatter(formatter) root.addHandler(ch) configure_logger() if args.verbose: logging.getLogger().setLevel(logging.DEBUG) print_header(args) parser = MachoParser(pathlib.Path(args.binary_path)) # print slice info print("Slices:") for macho_slice in parser.slices: print(f"\t{macho_slice.cpu_type.name} Mach-O slice") binary = pick_macho_slice(parser) print(f"Reading {binary.cpu_type.name} slice\n\n") analyzer = MachoAnalyzer.get_analyzer(binary) shell = StrongarmShell(binary, analyzer) if script: print("Running provided script...\n\n") strongarm_script(binary, analyzer) else: autorun_cmd = "info metadata segments sections loads" print(f"Auto-running '{autorun_cmd}'\n\n") shell.run_command(autorun_cmd) # this will return False once the shell exists while shell.process_command(): pass print("May your arms be beefy and your binaries unencrypted")
def setup_method(self) -> None: parser = MachoParser(TestFunctionAnalyzer.FAT_PATH) self.binary = parser.slices[0] self.analyzer = MachoAnalyzer.get_analyzer(self.binary) self.implementations = self.analyzer.get_imps_for_sel( "URLSession:didReceiveChallenge:completionHandler:") self.instructions = self.implementations[0].instructions self.imp_addr = self.instructions[0].address assert self.imp_addr == TestFunctionAnalyzer.URL_SESSION_DELEGATE_IMP_ADDR self.function_analyzer = ObjcFunctionAnalyzer(self.binary, self.instructions)
def get_function_analyzer_for_signature( cls, binary: MachoBinary, class_name: str, sel_name: str) -> "ObjcFunctionAnalyzer": from strongarm.macho.macho_analyzer import MachoAnalyzer analyzer = MachoAnalyzer.get_analyzer(binary) for objc_cls in analyzer.objc_classes(): if objc_cls.name == class_name: for sel in objc_cls.selectors: if sel.name == sel_name: # XXX(PT): where are the method info's normally stored? Can we grab it from there? method_info = ObjcMethodInfo(objc_cls, sel, sel.implementation) return ObjcFunctionAnalyzer.get_function_analyzer_for_method( binary, method_info) raise RuntimeError( f"No found function analyzer for -[{class_name} {sel_name}]")
def main(): arg_parser = argparse.ArgumentParser(description="strings clone") arg_parser.add_argument( "binary_path", metavar="binary_path", type=str, help="Path to binary whose strings should be printed") args = arg_parser.parse_args() parser = MachoParser(pathlib.Path(args.binary_path)) # Get the unique strings from all slices all_strings = set() for fat_slice in parser.slices: # Parsing the string table requires a MachoAnalyzer analyzer = MachoAnalyzer.get_analyzer(fat_slice) all_strings.update(analyzer.strings()) for string in all_strings: print(string)
def get_function_analyzer( cls, binary: MachoBinary, start_address: VirtualMemoryPointer) -> "ObjcFunctionAnalyzer": """Get the shared analyzer for the function at start_address in the binary. This method uses a cached MachoAnalyzer if available, which is more efficient than analyzing the same binary over and over. Therefore, this method should be used when an ObjcFunctionAnalyzer is needed, instead of constructing it yourself. Args: binary: The MachoBinary containing a function at start_address start_address: The entry point address for the function to be analyzed Returns: An ObjcFunctionAnalyzer suitable for introspecting a block of code. """ from strongarm.macho.macho_analyzer import MachoAnalyzer analyzer = MachoAnalyzer.get_analyzer(binary) instructions = analyzer.get_function_instructions(start_address) return ObjcFunctionAnalyzer(binary, instructions)
def test_three_op_add(self) -> None: # 0x000000010000665c adrp x0, #0x102a41000 # 0x0000000100006660 add x0, x0, #0x458 # 0x0000000100006664 bl 0x101f8600c three_op_binary = pathlib.Path( __file__).parent / "bin" / "ThreeOpAddInstruction" binary = MachoParser(three_op_binary).get_arm64_slice() assert binary analyzer = MachoAnalyzer.get_analyzer(binary) function_analyzer = ObjcFunctionAnalyzer( binary, analyzer.get_function_instructions( VirtualMemoryPointer(0x10000665C))) target_instr = function_analyzer.get_instruction_at_address( VirtualMemoryPointer(0x100006664)) wrapped_instr = ObjcInstruction.parse_instruction( function_analyzer, target_instr) contents = function_analyzer.get_register_contents_at_instruction( "x0", wrapped_instr) assert contents.type == RegisterContentsType.IMMEDIATE assert contents.value == 0x102A41458
def main(): arg_parser = argparse.ArgumentParser(description="classdump clone") arg_parser.add_argument( "binary_path", metavar="binary_path", type=str, help="Path to binary to print Objective-C class information") args = arg_parser.parse_args() parser = MachoParser(pathlib.Path(args.binary_path)) # Find a binary slice, preferring arm64 if available arm64_slices = [x for x in parser.slices if x.cpu_type == CPU_TYPE.ARM64] binary = arm64_slices[0] if len(arm64_slices) else parser.slices[0] analyzer = MachoAnalyzer.get_analyzer(binary) for objc_class in analyzer.objc_classes() + analyzer.objc_categories(): # Print the opening line of the declaration class_declaration = f"@interface {objc_class.name} : NSObject" if len(objc_class.protocols): protocol_list = ", ".join(x.name for x in objc_class.protocols) class_declaration += f" <{protocol_list}>" print(class_declaration) # Print the ivar list print("{") for ivar in objc_class.ivars: # The ivar's class name will be @"enclosed" if it's an Objective-C class. Strip this. class_name = ivar.class_name.strip('@"') print(f"\t{class_name}* {ivar.name};") print("}") # Print the method list for method in objc_class.selectors: # TODO(PT): Guess argument types by using the selector's type encoding print(_prototype_from_selector(method.name)) print(f"@end\n")
def test_analyze_embedded_binary( self, dyld_shared_cache: DyldSharedCacheParser) -> None: # Given I parse an embedded binary binary = dyld_shared_cache.get_embedded_binary( Path("/usr/lib/libSystem.B.dylib")) # The binary appears to be parsed correctly assert binary.get_virtual_base() == 0x18002E000 assert binary.get_functions() == { 0x18002FA7C, 0x18002FB7C, 0x18002FB34, 0x18002FB58, 0x18002FBBC } # And the binary can be analyzed further analyzer = MachoAnalyzer.get_analyzer(binary) # And the analyzed binary reports the correct information assert len(analyzer.imported_symbols) == 47 expected_exports = { "<redacted>": 0x18002FBBC, "___crashreporter_info__": 0x1B7C574B8, "_libSystem_atfork_child": 0x18002FB7C, "_libSystem_atfork_parent": 0x18002FB58, "_libSystem_atfork_prepare": 0x18002FB34, "_mach_init_routine": 0x1B7C574B0, } assert analyzer.exported_symbol_names_to_pointers == expected_exports
def binary_containing_code( code_inside_objc_class: str, is_assembly: bool, code_outside_objc_class: str = "" ) -> Generator[Tuple[MachoBinary, MachoAnalyzer], None, None]: """Provide an app package which contains the compiled source code. If is_assembly is set, the source code is treated as AArch64 assembly. Otherwise, as Objective-C source. The provided source code is embedded within a class definition. If you need to embed code outside a class definition, pass it as code_outside_objc_class. This method will cache the compiled binary in tests/bin/source_code_test_binaries. This facilitates running the unit tests using this mechanism in Pipelines. """ # TODO(PT): When you modify source code of a unit test, it means there is a 'dangling' unused binary in the tree. # Add a cleanup task to identify these unused binaries and delete them. # Do we need to compile this code, or is there a cached version available? code_hash = hashlib.md5( f"{code_inside_objc_class}{code_outside_objc_class}".encode( )).hexdigest() compiled_artifacts_dir = pathlib.Path( __file__).parent / "bin" / "auto_compiled_binaries" compiled_code_bin_path = compiled_artifacts_dir / str(code_hash) if not compiled_code_bin_path.exists(): # Compile and cache this source code with _compile_code(code_inside_objc_class, is_assembly, code_outside_objc_class=code_outside_objc_class ) as temp_compiled_bin: shutil.copy(temp_compiled_bin, compiled_code_bin_path) binary = MachoParser(compiled_code_bin_path).get_arm64_slice() assert binary is not None analyzer = MachoAnalyzer.get_analyzer(binary) yield binary, analyzer