def get_binary_architectures(filename): """ Inspects the given binary and returns tuple (is_fat, archs), where is_fat is boolean indicating fat/thin binary, and arch is list of architectures with lipo/codesign compatible names. """ executable = MachO(filename) return bool(executable.fat), [_get_arch_string(hdr.header) for hdr in executable.headers]
def extract_shellcode(filename): # find offset of _text and _data and extract to bin file b = os.path.splitext(filename)[0] macho_filename = os.path.join(SRC_DIR, "%s.macho" % (b)) fileoffset = 0 shellcodesize = 0 m = MachO(macho_filename) for (load_cmd, cmd, data) in m.headers[0].commands: if data: if hasattr(data[0], "sectname"): sectionName = getattr(data[0], 'sectname', '').rstrip('\0') if "text" in sectionName: fileoffset = data[0].offset shellcodesize += data[0].size if "data" in sectionName: shellcodesize += data[0].size shellcode_filename = os.path.join(SRC_DIR, "%s_shellcode.bin" % (b)) with open(macho_filename, 'rb') as f: f.seek(fileoffset, 1) shellcode_bytes = f.read(shellcodesize) with open(shellcode_filename, 'wb') as sf: sf.write(shellcode_bytes) sf.close() f.close() return shellcode_bytes
def modify_object_macholib(cur_path, paths_to_paths): """ This function is used when install machO buildcaches on linux by rewriting mach-o loader commands for dependency library paths of mach-o binaries and the id path for mach-o libraries. Rewritting of rpaths is handled by replace_prefix_bin. Inputs mach-o binary to be modified dictionary mapping paths in old install layout to new install layout """ dll = MachO(cur_path) changedict = paths_to_paths def changefunc(path): npath = changedict.get(path, None) return npath dll.rewriteLoadCommands(changefunc) try: f = open(dll.filename, 'rb+') for header in dll.headers: f.seek(0) dll.write(f) f.seek(0, 2) f.flush() f.close() except Exception: pass return
def fuzz(self, filename, output_filename): self.macho = MachO(filename) changes = random.randint(1, 25) for i in range(changes * 5): self.do_fuzz_internal() if len(self.change_list) == changes: break # Copy the contents of the original file to the output file f = open(output_filename, "wb+") f.write(open(filename, "rb").read()) f.close() # Update it's contents f = open(output_filename, "rb+") self.macho.write(f) f.close() # And write the .diff file f = open(output_filename + ".diff", "wb") f.write("# Original file created by 'MachO Mutator' was %s\n" % filename) for change in self.changes: print "# CHANGE: %s" % ", ".join(change) f.write("# CHANGE: %s\n" % ", ".join(change)) f.close() os.system("radiff2 %s %s" % (filename, output_filename))
def __init__(self, file_path): super(MachOExecutable, self).__init__(file_path) self.helper = MachO(self.fp) if self.helper.fat: raise Exception('MachO fat binaries are not supported at this time') self.architecture = self._identify_arch() if self.architecture is None: raise Exception('Architecture is not recognized') logging.debug('Initialized {} {} with file \'{}\''.format(self.architecture, type(self).__name__, file_path)) self.pack_endianness = self.helper.headers[0].endian self.sections = [] for lc, cmd, data in self.helper.headers[0].commands: if lc.cmd in (LC_SEGMENT, LC_SEGMENT_64): for section in data: self.sections.append(section_from_macho_section(section, cmd)) self.executable_segment = [cmd for lc, cmd, _ in self.helper.headers[0].commands if lc.cmd in (LC_SEGMENT, LC_SEGMENT_64) and cmd.initprot & 0x4][0] self.libraries = [fp.rstrip('\x00') for lc, cmd, fp in self.helper.headers[0].commands if lc.cmd == LC_LOAD_DYLIB]
def _get_machine_type(self, path): try: pe = pefile.PE(path) format_ = 'PE' if pefile.MACHINE_TYPE[pe.FILE_HEADER.Machine].find('I386') != -1: arch = '32-bit' else: arch = '64-bit' except pefile.PEFormatError, detail: try: self._dprint(detail) m = MachO(path) format_ = 'Mach-O' for header in m.headers: if CPU_TYPE_NAMES.get(header.header.cputype, header.header.cputype) == 'x86_64': #if header.MH_MAGIC == MH_MAGIC_64: arch = '64-bit' else: arch = '32-bit' except: try: elffile = ELFFile(open(path, 'rb')) format_ = 'ELF' e_ident = elffile.header['e_ident'] if e_ident['EI_CLASS'] == 'ELFCLASS64': arch = '64-bit' else: arch = '32-bit' except: return None, None
def macholib_get_paths(cur_path): """ Get rpaths, dependencies and id of mach-o objects using python macholib package """ dll = MachO(cur_path) ident = None rpaths = list() deps = list() for header in dll.headers: rpaths = [ data.rstrip(b'\0').decode('utf-8') for load_command, dylib_command, data in header.commands if load_command.cmd == macholib.mach_o.LC_RPATH ] deps = [ data.rstrip(b'\0').decode('utf-8') for load_command, dylib_command, data in header.commands if load_command.cmd == macholib.mach_o.LC_LOAD_DYLIB ] idents = [ data.rstrip(b'\0').decode('utf-8') for load_command, dylib_command, data in header.commands if load_command.cmd == macholib.mach_o.LC_ID_DYLIB ] if len(idents) == 1: ident = idents[0] tty.debug('ident: %s' % ident) tty.debug('deps: %s' % deps) tty.debug('rpaths: %s' % rpaths) return (rpaths, deps, ident)
def print_file(fp, path): print(path, file=fp) m = MachO(path) for header in m.headers: seen = set() if header.MH_MAGIC == MH_MAGIC_64 or header.MH_MAGIC == MH_CIGAM_64: sz = "64-bit" else: sz = "32-bit" arch = CPU_TYPE_NAMES.get(header.header.cputype, header.header.cputype) subarch = get_cpu_subtype(header.header.cputype, header.header.cpusubtype) print( " [%s endian=%r size=%r arch=%r subarch=%r]" % (header.__class__.__name__, header.endian, sz, arch, subarch), file=fp, ) for _idx, _name, other in header.walkRelocatables(): if other not in seen: seen.add(other) print("\t" + other, file=fp) print("", file=fp)
def is_macho(path): """Return True if the given path is a Mach-O binary.""" try: MachO(path) return True except ValueError as e: # Grrr, why isn't macholib raising proper exceptions... assert str(e).startswith("Unknown Mach-O") return False
def macho_dependencies_list(target_path, header_magic=None): """ Generates a list of libraries the given Mach-O file depends on. In that list a single library is represented by its "install path": for some libraries it would be a full file path, and for others it would be a relative path (sometimes with dyld templates like @executable_path or @rpath in it). Note: I don't know any reason why would some architectures of a fat Mach-O depend on certain libraries while others don't, but *it's technically possible*. So that's why you may want to specify the `header_magic` value for a particular header. Returns an object with two properties: `weak` and `strong` that hold lists of weak and strong dependencies respectively. """ MachODeprendencies = namedtuple("MachODeprendecies", "weak strong") # Convert the magic value into macholib representation if needed if isinstance(header_magic, basestring): header_magic = _MH_MAGIC_from_string(header_magic) macho = MachO(target_path) # Obtain a list of headers for the required magic value (if any) suggestions = filter( lambda t: t.header.magic == header_magic or # just add all headers if user didn't specifiy the magic header_magic == None, macho.headers) header = None if len(suggestions) <= 0 else suggestions[0] # filter() above *always* returns a list, so we have to check if it's empty if header is None: raise Exception( "Unable to find a header for the given MAGIC value in that Mach-O file" ) return None def decodeLoadCommandData(data): # Also ignore trailing zeros return data[:data.find(b"\x00")].decode(sys.getfilesystemencoding()) def strongReferencesFromHeader(h): # List of LC_LOAD_DYLIB commands list = filter(lambda (lc, cmd, data): lc.cmd == LC_LOAD_DYLIB, h.commands) # Their contents (aka data) as a file path return map(lambda (lc, cmd, data): decodeLoadCommandData(data), list) def weakReferencesFromHeader(h): list = filter(lambda (lc, cmd, data): lc.cmd == LC_LOAD_WEAK_DYLIB, h.commands) return map(lambda (lc, cmd, data): decodeLoadCommandData(data), list) strongRefs = strongReferencesFromHeader(header) weakRefs = weakReferencesFromHeader(header) return MachODeprendencies(weak=weakRefs, strong=strongRefs)
def target(self): filename = "" global debugger global target global allFuncsNames panel = NSOpenPanel.openPanel() panel.setCanCreateDirectories_(True) panel.setCanChooseDirectories_(True) panel.setCanChooseFiles_(True) panel.setAllowsMultipleSelection_(False) if panel.runModal() == NSOKButton: filename = panel.filename() try: if os.path.isdir(filename): #have to identify which file #or figure out how to open .app files using nsopen filename = glob.glob(filename + '/Contents/MacOS/*')[0] m = MachO(filename) for header in m.headers: if header.MH_MAGIC == MH_MAGIC_64: arch = 'systemArch64' else: arch = 'systemArch32' if platform.architecture()[0] == '64bit': platArch = 'systemArch64' else: platArch = 'systemArch32' target = debugger.CreateTarget(str(filename), arch, None, True, error) # self.lldbout.setString_("[+]\tTarget set as: " + filename + "\n") # symtab=run_commands(command_interpreter,[str("image dump symtab "+filename.split('/')[-1])]).split('\n') # symtab=symtab[7:] allFuncs = returnFuntions(target, filename) for i in allFuncs: allFuncsNames.append(i.name) self.ds.addObjectsFromArray_(allFuncsNames) self.tv.setDataSource_(self) self.tv.setDelegate_(self) self.lldbout.setString_( str(self.lldbout.string()) + "\n[+]\tCollecting functions\n") self.lldbout.setString_( str(self.lldbout.string()) + "[+]\tAnalysis Done\n") except: alert('There was an error while setting target. Please try again!')
def get_macho_filetype(filepath): try: macho = MachO(filepath) except (ValueError, struct.error, IOError): return set() types = set() for header in macho.headers: types.add(header.filetype) return types
def processFile(filename): res = defaultdict() try: machoEntropyComputer = MachoEntropyComputer(filename, MachO(filename)) except: print "ERROR while parsing", filename return None for s in machoEntropyComputer.compute(): print "%s has %i bytes with entropy of %f" % ( s.sectionName, s.sectionSize, s.sectionEntropy) res[s.sectionName] = s.sectionEntropy return res
def get_macos_sdk_version(filename): """ Obtain the version of macOS SDK against which the given binary was built. NOTE: currently, version is retrieved only from the first arch slice in the binary. :return: (major, minor, revision) tuple """ binary = MachO(filename) header = binary.headers[0] # Find version command using helper version_cmd = _find_version_cmd(header) return _hex_triplet(version_cmd[1].sdk)
def get_macho_load_commands(filepath): commands = set() try: macho = MachO(filepath) except (ValueError, struct.error): return set() for header in macho.headers: for command in header.commands: commands.add(command[0].get_cmd_name()) return commands
def splitFatBinaryFile(path, outputPath): logger = utils.setLogger() if not utils.isFatBinary(path): logger.info("this file is not fat binary file,can not be splited") return m = MachO(path) for header in m.headers: utils.getPartOfFile(path, header.offset, header.size) fin = open("temp", "rb") s1 = hashlib.sha1(fin.read()).hexdigest() fin.close() os.rename("temp", outputPath + "/" + s1) logger.info("this fat binary file has been splited sucessfully")
def macho_archs( filename: typing.Union[os.PathLike[str], str]) -> typing.Set[str]: result = set() m = MachO(os.fspath(filename)) for hdr in m.headers: arch = CPU_TYPE_NAMES[hdr.header.cputype] if arch == "PowerPC": arch = "ppc" elif arch == "PowerPC64": arch = "ppc64" result.add(arch) return result
def detect_macho_type(path): """ Returns None if not a mach-o. """ try: p = MachO(path) except ValueError as e: # Grrr, why isn't macholib raising proper exceptions... assert str(e).startswith("Unknown Mach-O") return None else: if len(p.headers) < 1: raise ValueError("No headers in the mach-o file ?") else: return p.headers[0].filetype
def macosx_version_min(filename: str) -> tuple: """ Get the -macosx-version-min used to compile a macOS binary. For fat binaries, the minimum version is selected. """ versions = [] for header in MachO(filename).headers: cmd = _find_version_cmd(header) if cmd[0].cmd == LC_VERSION_MIN_MACOSX: versions.append(cmd[1].version) else: # macOS >= 10.14 uses LC_BUILD_VERSION instead. versions.append(cmd[1].minos) return min(map(_hex_triplet, versions))
def rewriteFramework(framework, frameworkMap): basename = os.path.splitext(os.path.basename(framework))[0] dyld = os.path.abspath(os.path.join(framework, basename)) macho = MachO(dyld) def changefunc(key): if key == dyld: return dyld dirname, filename = os.path.split(key) return frameworkMap.get(filename) macho.rewriteLoadCommands(changefunc) macho.write(open(dyld, "rb+"))
def print_file(fp, path): print >>fp, path m = MachO(path) for header in m.headers: seen = set() if header.MH_MAGIC == MH_MAGIC_64: sz = '64-bit' else: sz = '32-bit' print >>fp, ' [%s endian=%r size=%r arch=%r]' % (header.__class__.__name__, header.endian, sz, ARCH_MAP[(header.endian, sz)]) for idx, name, other in header.walkRelocatables(): if other not in seen: seen.add(other) print >>fp, '\t' + other
def get_bin_info(bin_file): """Get Binary Information.""" logger.info('Getting Binary Information') m = MachO(bin_file) for header in m.headers: if header.MH_MAGIC == MH_MAGIC_64 or header.MH_MAGIC == MH_CIGAM_64: sz = '64-bit' else: sz = '32-bit' arch = CPU_TYPE_NAMES.get( header.header.cputype, header.header.cputype) subarch = get_cpu_subtype( header.header.cputype, header.header.cpusubtype) return {'endian': header.endian, 'bit': sz, 'arch': arch, 'subarch': subarch}
def init_macho_info(macho_file): """ :param macho_file: :return: """ macho_obj = MachO(macho_file) for (_load_cmd, cmd, data) in macho_obj.headers[0].commands: try: segname = getattr(cmd, 'segname') except AttributeError: continue if segname.startswith(b'__TEXT'): params = dict() for _index, section in enumerate(data): sect_name = getattr(section, 'sectname') if sect_name.startswith(b'__text'): text_offset = getattr(section, 'offset') text_size = getattr(section, 'size') params['text_offset'] = text_offset params['text_size'] = text_size if sect_name.startswith(b'__objc_classname'): class_offset = getattr(section, 'offset') class_size = getattr(section, 'size') params['class_offset'] = class_offset params['class_size'] = class_size if sect_name.startswith(b'__objc_methname'): methname_offset = getattr(section, 'offset') methname_size = getattr(section, 'size') params['methname_offset'] = methname_offset params['methname_size'] = methname_size if sect_name.startswith(b'__cstring'): cstring_offset = getattr(section, 'offset') cstring_size = getattr(section, 'size') params['cstring_offset'] = cstring_offset params['cstring_size'] = cstring_size if sect_name.startswith(b'__objc_methtype'): methtype_offset = getattr(section, 'offset') methtype_size = getattr(section, 'size') params['methtype_offset'] = methtype_offset params['methtype_size'] = methtype_size return params return None
def getData(pathToSamples, c): df = pd.DataFrame(columns=('f_size', 'f_sections', 'f_flags', 'c')) currentRow = 0 for f in os.listdir(pathToSamples): if not ".json" in f: fullpath = os.path.join(pathToSamples, f) f_size = getSizeInKiloByte(fullpath) try: macho = MachO(fullpath) f_sections = getSectionCount(macho) f_flags = macho.headers[0].header.flags except: continue df.loc[currentRow] = [f_size, f_sections, f_flags, c] currentRow += 1 return df
def mac_is_binary_signed(filename): """ Check if the given macOS binary file is signed. """ from macholib.MachO import MachO from macholib import mach_o # constants # Open the file try: m = MachO(filename) except Exception: return False # Walk over all headers and check if any contains LC_CODE_SIGNATURE # load command for header in m.headers: for cmd in header.commands: if cmd[0].cmd == mach_o.LC_CODE_SIGNATURE: return True return False
def print_file(fp, path): print(path, file=fp) m = MachO(path) for header in m.headers: seen = set() if header.MH_MAGIC == MH_MAGIC_64: sz = '64-bit' else: sz = '32-bit' arch = CPU_TYPE_NAMES.get(header.header.cputype, header.header.cputype) print(' [%s endian=%r size=%r arch=%r]' % (header.__class__.__name__, header.endian, sz, arch), file=fp) for idx, name, other in header.walkRelocatables(): if other not in seen: seen.add(other) print('\t' + other, file=fp) print('', file=fp)
def set_macos_sdk_version(filename, major, minor, revision): """ Overwrite the macOS SDK version declared in the given binary with the specified version. NOTE: currently, only version in the first arch slice is modified. """ # Validate values assert 0 <= major <= 255, "Invalid major version value!" assert 0 <= minor <= 255, "Invalid minor version value!" assert 0 <= revision <= 255, "Invalid revision value!" # Open binary binary = MachO(filename) header = binary.headers[0] # Find version command using helper version_cmd = _find_version_cmd(header) # Write new SDK version number version_cmd[1].sdk = major << 16 | minor << 8 | revision # Write changes back. with open(binary.filename, 'rb+') as fp: binary.write(fp)
def fix_exe_for_code_signing(filename): """ Fixes the Mach-O headers to make code signing possible. Code signing on OS X does not work out of the box with embedding .pkg archive into the executable. The fix is done this way: - Make the embedded .pkg archive part of the Mach-O 'String Table'. 'String Table' is at end of the OS X exe file so just change the size of the table to cover the end of the file. - Fix the size of the __LINKEDIT segment. Mach-O format specification: http://developer.apple.com/documentation/Darwin/Reference/ManPages/man5/Mach-O.5.html """ exe_data = MachO(filename) # Every load command is a tupple: (cmd_metadata, segment, [section1, section2]) cmds = exe_data.headers[ 0].commands # '0' - Exe contains only one architecture. file_size = exe_data.headers[0].size ## Make the embedded .pkg archive part of the Mach-O 'String Table'. # Data about 'String Table' is in LC_SYMTAB load command. for c in cmds: if c[0].get_cmd_name() == 'LC_SYMTAB': data = c[1] # Increase the size of 'String Table' to cover the embedded .pkg file. new_strsize = file_size - data.stroff data.strsize = new_strsize ## Fix the size of the __LINKEDIT segment. # __LINKEDIT segment data is the 4th item in the executable. linkedit = cmds[3][1] new_segsize = file_size - linkedit.fileoff linkedit.filesize = new_segsize linkedit.vmsize = new_segsize ## Write changes back. fp = open(exe_data.filename, 'rb+') exe_data.write(fp) fp.close()
def get_macho_section_name(filepath): section_types = set() try: macho = MachO(filepath) except (ValueError, struct.error, IOError): return set() for header in macho.headers: for command in header.commands: try: if command[1].nsects: for sect in command[2]: if sect.size: segname = sect.segname.replace("\x00", "") sectname = sect.sectname.replace("\x00", "") section_types.add("%s.%s" % (segname, sectname)) except AttributeError, e: pass
def main(): binary_path = sys.argv[1] platform_dir = sys.argv[2] out_path = sys.argv[3] # We want any dylib linked against which name starts with "libswift" seen = set() queue = [binary_path] while queue: path = queue.pop() m = MachO(path) for header in m.headers: for _, _, other in header.walkRelocatables(): if other.startswith("@rpath/libswift"): full_path = dylib_full_path(platform_dir, other.lstrip("@rpath/")) if full_path not in seen: queue.append(full_path) seen.add(full_path) for dylib in seen: shutil.copy(dylib, out_path)