def parse_load_dylib(self, lc): offset = get_int(self.f) timestamp = get_int(self.f) current_version = get_int(self.f) compatibility_version = get_int(self.f) if self.macho.is_little(): offset = little(offset, 'I') timestamp = little(timestamp, 'I') current_version = little(current_version, 'I') compatibility_version = little(compatibility_version, 'I') timestamp = datetime.fromtimestamp(timestamp) current_version = Version(version=current_version) compatibility_version = Version(version=compatibility_version) dylib = strip(self.f.read(lc.size - 24)) self.macho.add_dylib(dylib) lc.add_data('timestamp', str(timestamp)) lc.add_data('current_version', current_version.version) lc.add_data('compatibility_version', compatibility_version.version) lc.add_data('dylib', dylib) self.macho.add_lc(lc)
def parse_requirements(self, signature, offset): prev = self.f.tell() true_offset = signature.offset + offset self.f.seek(true_offset) magic = get_int(self.f) if magic != dictionary.signatures['REQUIREMENTS']: data = { 'offset': true_offset, 'magic': hex(magic), 'expected': hex(dictionary.signatures['REQUIREMENTS']) } a = Abnormality(title='BAD MAGIC - REQUIREMENTS', data=data) self.add_abnormality(a) self.f.seek(prev) return # Skip size self.f.read(4) count = get_int(self.f) while count > 0: req_type = dictionary.requirements[get_int(self.f)] offset = get_int(self.f) requirement = Requirement(req_type=req_type, offset=offset) self.parse_requirement(requirement, true_offset) signature.add_requirement(requirement) count -= 1 self.f.seek(prev)
def parse_sig(self, macho): if not macho.has_lc('CODE_SIGNATURE'): return prev = self.f.tell() true_offset = (macho.offset + macho.get_lc('CODE_SIGNATURE').data['offset']) if true_offset >= self.file.size: data = { 'offset': true_offset, 'file_size': self.file.size } a = Abnormality(title='CODE_SIGNATURE OUT OF BOUNDS', data=data) self.add_abnormality(a) return self.f.seek(true_offset) magic = get_int(self.f) if magic != dictionary.signatures['EMBEDDED_SIGNATURE']: data = { 'offset': true_offset, 'magic': hex(magic), 'expected': hex(dictionary.signatures['EMBEDDED_SIGNATURE']) } a = Abnormality(title='BAD MAGIC - EMBEDDED_SIGNATURE', data=data) self.add_abnormality(a) self.f.seek(prev) return size = get_int(self.f) count = get_int(self.f) signature = Signature(offset=true_offset, size=size, count=count) while count > 0: index_type = get_int(self.f) try: index_type = dictionary.indeces[index_type] except: data = { 'offset': self.f.tell() - 4, 'index_type': index_type } a = Abnormality(title='INVALID CODE_SIGNATURE INDEX_TYPE', data=data) self.add_abnormality(a) offset = get_int(self.f) if index_type == 'SignatureSlot': self.parse_certs(signature, offset) elif index_type == 'CodeDirectorySlot': self.parse_codedirectory(signature, offset) elif index_type == 'EntitlementSlot': self.parse_entitlement(signature, offset) elif index_type == 'RequirementsSlot': self.parse_requirements(signature, offset) count -= 1 macho.signature = signature self.f.seek(prev)
def parse_linkedit_data(self, lc): offset = get_int(self.f) size = get_int(self.f) if self.macho.is_little(): offset = little(offset, 'I') size = little(size, 'I') lc.add_data('offset', offset) lc.add_data('size', size) self.macho.add_lc(lc)
def parse_twolevel_hints(self, lc): offset = get_int(self.f) nhints = get_int(self.f) if self.macho.is_little(): offset = little(offset, 'I') nhints = little(nhints, 'I') lc.add_data('offset', offset) lc.add_data('nhints', nhints) self.macho.add_lc(lc)
def parse_certs(self, signature, offset): prev = self.f.tell() true_offset = signature.offset + offset self.f.seek(true_offset) magic = get_int(self.f) if magic != dictionary.signatures['BLOBWRAPPER']: data = { 'offset': true_offset, 'magic': hex(magic), 'expected': hex(dictionary.signatures['BLOBWRAPPER']) } a = Abnormality(title='BAD MAGIC - BLOBWRAPPER', data=data) self.add_abnormality(a) self.f.seek(prev) return size = get_int(self.f) - 8 if size > 0: signed_data = cms.CMS(self.f.read(size), format='DER') for cert in signed_data.certs: serial = cert.serial subject = { 'country': self.get_cert_name_data(cert.subject, oid.Oid('C')), 'org': self.get_cert_name_data(cert.subject, oid.Oid('O')), 'org_unit': self.get_cert_name_data(cert.subject, oid.Oid('OU')), 'common_name': self.get_cert_name_data(cert.subject, oid.Oid('CN')) } issuer = { 'country': self.get_cert_name_data(cert.issuer, oid.Oid('C')), 'org': self.get_cert_name_data(cert.issuer, oid.Oid('O')), 'org_unit': self.get_cert_name_data(cert.issuer, oid.Oid('OU')), 'common_name': self.get_cert_name_data(cert.issuer, oid.Oid('CN')) } ca = cert.check_ca() cert = Certificate(serial=serial, subject=subject, issuer=issuer, ca=ca) signature.add_cert(cert) else: data = { 'offset': true_offset, 'size': size } a = Abnormality(title='NON-POSITIVE CMS SIZE', data=data) self.add_abnormality(a) self.f.seek(prev)
def parse_version_min_os(self, lc): version = get_int(self.f) sdk = get_int(self.f) if self.macho.is_little(): version = little(version, 'I') sdk = little(sdk, 'I') version = Version(version=version) sdk = Version(version=sdk) lc.add_data('version', version.version) lc.add_data('sdk', sdk.version) self.macho.minos = version self.macho.add_lc(lc)
def parse_segment(self, lc): name = strip(self.f.read(16)) vmaddr = get_int(self.f) if self.macho.is_32_bit() else get_ll(self.f) vmsize = get_int(self.f) if self.macho.is_32_bit() else get_ll(self.f) offset = get_int(self.f) if self.macho.is_32_bit() else get_ll(self.f) segsize = get_int(self.f) if self.macho.is_32_bit() else get_ll(self.f) maxprot = get_int(self.f) initprot = get_int(self.f) nsects = get_int(self.f) flags = get_int(self.f) if self.macho.is_little(): vmaddr = little(vmaddr, 'I') if self.macho.is_32_bit() \ else little(vmaddr, 'Q') vmsize = little(vmsize, 'I') if self.macho.is_32_bit() \ else little(vmsize, 'Q') offset = little(offset, 'I') if self.macho.is_32_bit() \ else little(offset, 'Q') segsize = little(segsize, 'I') if self.macho.is_32_bit() \ else little(segsize, 'Q') maxprot = little(maxprot, 'I') initprot = little(initprot, 'I') nsects = little(nsects, 'I') flags = little(flags, 'I') maxprot = dictionary.protections[maxprot & 0b111] initprot = dictionary.protections[initprot & 0b111] entropy = self.get_segment_entropy(offset, segsize) segment = Segment(cmd=lc.cmd, size=lc.size, name=name, vmaddr=vmaddr, vmsize=vmsize, offset=offset, segsize=segsize, maxprot=maxprot, initprot=initprot, nsects=nsects, entropy=entropy) if self.macho.is_32_bit(): sect_size = 68 else: sect_size = 80 for i in range(segment.nsects): if self.f.tell() + sect_size > self.file_size: data = {'offset': self.f.tell(), 'file_size': self.file_size} a = Abnormality(title='SECTION OUT OF BOUNDS', data=data) break sect = self.parse_section() segment.add_sect(sect) self.parse_segment_flags(segment, flags) self.macho.add_lc(segment)
def identify_file(self, offset): prev = self.f.tell() self.f.seek(offset) magic = get_int(self.f) self.f.seek(prev) if magic not in dictionary.machos: return magic return dictionary.machos[magic]
def parse_section(self): name = strip(self.f.read(16)) segname = strip(self.f.read(16)) addr = get_int(self.f) if self.macho.is_32_bit() else get_ll(self.f) size = get_int(self.f) if self.macho.is_32_bit() else get_ll(self.f) offset = get_int(self.f) align = get_int(self.f) reloff = get_int(self.f) nreloc = get_int(self.f) flags = get_int(self.f) self.f.read(8) if self.macho.is_32_bit() else self.f.read(12) if self.macho.is_little(): addr = little(addr, 'I') if self.macho.is_32_bit() \ else little(addr, 'Q') size = little(size, 'I') if self.macho.is_32_bit() \ else little(size, 'Q') offset = little(offset, 'I') align = little(align, 'I') reloff = little(reloff, 'I') nreloc = little(nreloc, 'I') flags = little(flags, 'I') section = Section(name=name, segname=segname, addr=addr, offset=offset, align=align, reloff=reloff, nreloc=nreloc, size=size) self.parse_section_flags(section, flags) return section
def parse_prebind_cksum(self, lc): cksum = get_int(self.f) if self.macho.is_little(): cksum = little(cksum, 'I') lc.add_data('cksum', cksum) self.macho.add_lc(lc)
def parse_encryption_info(self, lc): offset = get_int(self.f) size = get_int(self.f) id = get_int(self.f) if self.macho.is_little(): offset = little(offset, 'I') size = little(size, 'I') id = little(id, 'I') lc.add_data('offset', offset) lc.add_data('size', size) lc.add_data('id', id) if lc.cmd == 'ENCRYPTION_INFO_64': # Skip padding self.f.read(4) self.macho.add_lc(lc)
def parse_segment(self, lc): name = strip(self.f.read(16)) vmaddr = get_int(self.f) if self.macho.is_32_bit() else get_ll(self.f) vmsize = get_int(self.f) if self.macho.is_32_bit() else get_ll(self.f) offset = get_int(self.f) if self.macho.is_32_bit() else get_ll(self.f) segsize = get_int(self.f) if self.macho.is_32_bit() else get_ll(self.f) maxprot = get_int(self.f) initprot = get_int(self.f) nsects = get_int(self.f) flags = get_int(self.f) if self.macho.is_little(): vmaddr = little(vmaddr, 'I') if self.macho.is_32_bit() \ else little(vmaddr, 'Q') vmsize = little(vmsize, 'I') if self.macho.is_32_bit() \ else little(vmsize, 'Q') offset = little(offset, 'I') if self.macho.is_32_bit() \ else little(offset, 'Q') segsize = little(segsize, 'I') if self.macho.is_32_bit() \ else little(segsize, 'Q') maxprot = little(maxprot, 'I') initprot = little(initprot, 'I') nsects = little(nsects, 'I') flags = little(flags, 'I') maxprot = dictionary.protections[maxprot & 0b111] initprot = dictionary.protections[initprot & 0b111] entropy = self.get_segment_entropy(offset, segsize) segment = Segment(cmd=lc.cmd, size=lc.size, name=name, vmaddr=vmaddr, vmsize=vmsize, offset=offset, segsize=segsize, maxprot=maxprot, initprot=initprot, nsects=nsects, entropy=entropy) if self.macho.is_32_bit(): sect_size = 68 else: sect_size = 80 for i in range(segment.nsects): if self.f.tell() + sect_size > self.file_size: data = { 'offset': self.f.tell(), 'file_size': self.file_size } a = Abnormality(title='SECTION OUT OF BOUNDS', data=data) break sect = self.parse_section() segment.add_sect(sect) self.parse_segment_flags(segment, flags) self.macho.add_lc(segment)
def parse_routines(self, lc): if lc.cmd == 'ROUTINES': init_address = get_int(self.f) init_module = get_int(self.f) if self.macho.is_little(): init_address = little(init_address, 'I') init_module = little(init_module, 'I') self.f.read(24) else: init_address = get_ll(self.f) init_module = get_ll(self.f) if self.macho.is_little(): init_address = little(init_address, 'Q') init_module = little(init_module, 'Q') self.f.read(48) lc.add_data('init_address', init_address) lc.add_data('init_module', init_module) self.macho.add_lc(lc)
def parse_entitlement(self, signature, offset): prev = self.f.tell() true_offset = signature.offset + offset self.f.seek(true_offset) magic = get_int(self.f) if magic != dictionary.signatures['ENTITLEMENT']: data = { 'offset': true_offset, 'magic': hex(magic), 'expected': hex(dictionary.signatures['ENTITLEMENT']) } a = Abnormality(title='BAD MAGIC - ENTITLEMENT', data=data) self.add_abnormality(a) self.f.seek(prev) return size = get_int(self.f) - 8 plist = plistlib.readPlistFromString(self.f.read(size)) entitlement = Entitlement(size=size, plist=plist) signature.add_entitlement(entitlement) self.f.seek(prev)
def parse_thread(self, lc): state = get_int(self.f) count = get_int(self.f) self.f.read(lc.size - 16) if self.macho.is_little(): state = little(state, 'I') count = little(count, 'I') try: state = dictionary.thread_states[state] except: data = {'offset': self.f.tell() - lc.size, 'state': state} a = Abnormality(title='INVALID THREAD STATE FLAVOR', data=data) self.add_abnormality(a) lc.add_data('state', state) lc.add_data('count', count) self.macho.add_lc(lc)
def parse_universal(self): self.f.seek(0) # skip magic self.f.read(4) nmachos = get_int(self.f) u = Universal(nmachos=nmachos) u_size = self.file.size for i in range(u.nmachos): # skip cputype, subtype self.f.read(8) offset = get_int(self.f) size = get_int(self.f) # Abnormality OUT_OF_BOUNDS check if offset + size > u_size: data = { 'offset': offset, 'size': size, 'file_size': u_size } a = Abnormality(title='MACH-O OUT OF BOUNDS', data=data) self.add_abnormality(a) continue # skip align self.f.read(4) identity = self.identify_file(offset) # Abnormality BAD_MAGIC check if identity not in dictionary.machos.values(): data = { 'offset': offset, 'magic': identity, } a = Abnormality(title='BAD MAGIC - MACH-O') self.add_abnormality(a) continue u.add_macho(MachO(archive=True, offset=offset, arch=identity[0], endi=identity[1], size=size)) for i in u.gen_machos(): self.parse_macho(i) self.file.content = u
def parse_symtab(self, lc): symoff = get_int(self.f) nsyms = get_int(self.f) stroff = get_int(self.f) strsize = get_int(self.f) if self.macho.is_little(): symoff = little(symoff, 'I') nsyms = little(nsyms, 'I') stroff = little(stroff, 'I') strsize = little(strsize, 'I') self.macho.symtab = SymbolTable(offset=symoff, nsyms=nsyms) self.macho.strtab = StringTable(offset=stroff, size=strsize) lc.add_data('symoff', symoff) lc.add_data('nsyms', nsyms) lc.add_data('stroff', stroff) lc.add_data('strsize', strsize) self.macho.add_lc(lc)
def parse_dysymtab(self, lc): il = get_int(self.f) nl = get_int(self.f) ie = get_int(self.f) ne = get_int(self.f) iu = get_int(self.f) nu = get_int(self.f) self.f.read(lc.size - 32) if self.macho.is_little(): self.macho.symtab.il = little(il, 'I') self.macho.symtab.nl = little(nl, 'I') self.macho.symtab.ie = little(ie, 'I') self.macho.symtab.ne = little(ne, 'I') self.macho.symtab.iu = little(iu, 'I') self.macho.symtab.nu = little(nu, 'I') lc.add_data('il', il) lc.add_data('nl', nl) lc.add_data('ie', ie) lc.add_data('ne', ne) lc.add_data('iu', iu) lc.add_data('nu', nu) self.macho.add_lc(lc)
def parse_prebound_dylib(self, lc): dylib = readstring(self.f) nmodules = get_int(self.f) linked_modules = readstring(self.f) if self.macho.is_little(): nmodules = little(nmodules, 'I') lc.add_data('dylib', dylib) lc.add_data('nmodules', nmodules) lc.add_data('linked_modules', linked_modules) self.macho.add_lc(lc)
def parse_thread(self, lc): state = get_int(self.f) count = get_int(self.f) self.f.read(lc.size - 16) if self.macho.is_little(): state = little(state, 'I') count = little(count, 'I') try: state = dictionary.thread_states[state] except: data = { 'offset': self.f.tell() - lc.size, 'state': state } a = Abnormality(title='INVALID THREAD STATE FLAVOR', data=data) self.add_abnormality(a) lc.add_data('state', state) lc.add_data('count', count) self.macho.add_lc(lc)
def parse_version_min_os(self, lc): version = get_int(self.f) sdk = get_int(self.f) if self.macho.is_little(): version = little(version, 'I') sdk = little(sdk, 'I') vx = version >> 16 vy = (version >> 8) & 0xff vz = version & 0xff version = OSVersion(vx=vx, vy=vy, vz=vz) sx = str(sdk >> 16) sy = str((sdk >> 8) & 0xff) sz = str(sdk & 0xff) sdk = sx + '.' + sy + '.' + sz lc.add_data('version', version.version) lc.add_data('sdk', sdk) self.macho.minos = version self.macho.add_lc(lc)
def parse_load_dylib(self, lc): offset = get_int(self.f) if self.macho.is_little(): offset = little(offset, 'I') # skip to dylib self.f.read(offset - 12) dylib = strip(self.f.read(lc.size - 24)) self.macho.add_dylib(dylib) lc.add_data('dylib', dylib) self.macho.add_lc(lc)
def parse_dyld_info(self, lc): rebase_off = get_int(self.f) rebase_size = get_int(self.f) bind_off = get_int(self.f) bind_size = get_int(self.f) weak_bind_off = get_int(self.f) weak_bind_size = get_int(self.f) lazy_bind_off = get_int(self.f) lazy_bind_size = get_int(self.f) export_off = get_int(self.f) export_size = get_int(self.f) if self.macho.is_little(): rebase_off = little(rebase_off, 'I') rebase_size = little(rebase_size, 'I') bind_off = little(bind_off, 'I') bind_size = little(bind_size, 'I') weak_bind_off = little(weak_bind_off, 'I') weak_bind_size = little(weak_bind_size, 'I') lazy_bind_off = little(lazy_bind_off, 'I') lazy_bind_size = little(lazy_bind_size, 'I') export_off = little(export_off, 'I') export_size = little(export_size, 'I') lc.add_data('rebase_off', rebase_off) lc.add_data('rebase_size', rebase_size) lc.add_data('bind_off', bind_off) lc.add_data('bind_size', bind_size) lc.add_data('weak_bind_off', weak_bind_off) lc.add_data('weak_bind_size', weak_bind_size) lc.add_data('lazy_bind_off', lazy_bind_off) lc.add_data('lazy_bind_size', lazy_bind_size) lc.add_data('export_off', export_off) lc.add_data('export_size', export_size) self.macho.add_lc(lc)
def parse_linker_option(self, lc): count = get_int(self.f) if self.macho.is_little(): count = little(count, 'I') linker_options = [] start = self.f.tell() for i in range(count): linker_option = readstring(self.f) linker_options.append(linker_option) length = self.f.tell() - start self.f.read(lc.size - length - 12) lc.add_data('count', count) lc.add_data('linker_options', linker_options) self.macho.add_lc(lc)
def parse_requirement(self, requirement, offset): prev = self.f.tell() true_offset = offset + requirement.offset self.f.seek(true_offset) magic = get_int(self.f) if magic != dictionary.signatures['REQUIREMENT']: data = { 'offset': true_offset, 'magic': hex(magic), 'expected': hex(dictionary.signatures['REQUIREMENT']) } a = Abnormality(title='BAD MAGIC - REQUIREMENT', data=data) self.add_abnormality(a) self.f.seek(prev) return # Skip size and kind self.f.read(8) requirement.expression = self.parse_expression(False) self.f.seek(prev)
def parse_match(self): match_type = get_int(self.f) if match_type in dictionary.matches: match_type = dictionary.matches[match_type] if match_type == 'matchExists': return ' /* exists */' elif match_type == 'matchEqual': return ' = "' + str(self.parse_data()) + '"' elif match_type == 'matchContains': return ' ~ "' + str(self.parse_data()) + '"' elif match_type == 'matchBeginsWith': return ' = "' + str(self.parse_data()) + '*"' elif match_type == 'matchEndsWith': return ' = "*' + str(self.parse_data()) + '"' elif match_type == 'matchLessThan': return ' < ' + str(int(self.parse_data().encode('hex'), 16)) elif match_type == 'matchGreaterThan': return ' > ' + str(int(self.parse_data().encode('hex'), 16)) elif match_type == 'matchLessEqual': return ' <= ' + str(int(self.parse_data().encode('hex'), 16)) elif match_type == 'matchGreaterEqual': return ' >= ' + str(int(self.parse_data().encode('hex'), 16)) else: return ' UNKNOWN MATCH TYPE (' + str(match_type) + ')'
def parse_syms(self, macho): prev = self.f.tell() true_offset = macho.offset + macho.symtab.offset if macho.is_64_bit(): symbol_size = 60 else: symbol_size = 56 if (true_offset < macho.offset + macho.size and true_offset < self.file.size): self.f.seek(true_offset) for i in range(macho.symtab.nsyms): if ((self.f.tell() + symbol_size > macho.offset + macho.size) or (self.f.tell() + symbol_size > self.file.size)): data = { 'offset': self.f.tell(), 'mach-o_size': macho.size, 'mach-o_offset': macho.offset, 'file_size': self.file.size } a = Abnormality(title='REMAINING SYMBOLS OUT OF BOUNDS', data=data) self.add_abnormality(a) self.f.seek(prev) return else: index = get_int(self.f) sym_type = int(self.f.read(1).encode('hex'), 16) sect = int(self.f.read(1).encode('hex'), 16) desc = int(self.f.read(2).encode('hex'), 16) value = None if macho.is_64_bit(): if macho.is_little(): value = little(get_ll(self.f), 'Q') else: value = get_ll(self.f) else: if macho.is_little(): value = little(get_int(self.f), 'I') else: value = get_int(self.f) if macho.is_little(): index = little(index, 'I') if sym_type >= 32: if sym_type in dictionary.stabs: stab = dictionary.stabs[sym_type] else: offset = self.f.tell() - symbol_size data = { 'offset': offset, 'index': index, 'sym_type': sym_type, 'sect': sect, 'desc': desc, 'value': value } a = Abnormality(title='UNKNOWN STAB', data=data) self.add_abnormality(a) continue sym = Symbol(index=index, stab=stab, sect=sect, value=value) macho.symtab.add_sym(sym) else: pext = sym_type & 0x10 if sym_type & 0x0e in dictionary.n_types: n_type = dictionary.n_types[sym_type & 0x0e] else: offset = self.f.tell() - symbol_size data = { 'offset': offset, 'index': index, 'pext': pext, 'n_type': sym_type & 0x0e, 'sect': sect, 'desc': desc, 'value': value } a = Abnormality(title='UNKNOWN N_TYPE', data=data) self.add_abnormality(a) ext = sym_type & 0x01 if macho.is_little(): dylib = desc & 0x0f ref = (desc >> 8) & 0xff else: dylib = (desc >> 8) & 0xff ref = desc & 0x0f sym = Symbol(index=index, pext=pext, sym_type=n_type, ext=ext, sect=sect, dylib=dylib, ref=ref, value=value) macho.symtab.add_sym(sym) else: data = { 'offset': true_offset, 'mach-o_size': macho.size, 'mach-o_offset': macho.offset, 'file_size': self.file.size } a = Abnormality(title='SYMBOL TABLE OUT OF BOUNDS', data=data) self.add_abnormality(a) self.f.seek(prev)
def parse_lcs(self): for i in range(self.macho.nlcs): cmd = get_int(self.f) size = get_int(self.f) if self.macho.endi == 'little': cmd = little(cmd, 'I') size = little(size, 'I') try: cmd = dictionary.loadcommands[cmd] except: data = {'offset': self.f.tell() - 8, 'cmd': cmd} a = Abnormality(title='UNKNOWN LOADCOMMAND', data=data) self.addAbnormality(a) lc = LoadCommand(cmd=cmd, size=size) self.macho.add_lc(lc) self.f.read(size - 8) continue lc = LoadCommand(cmd=cmd, size=size) if cmd == 'SEGMENT' or cmd == 'SEGMENT_64': self.parse_segment(lc) elif cmd == 'SYMTAB': self.parse_symtab(lc) elif cmd == 'SYMSEG': self.parse_symseg(lc) elif cmd == 'THREAD' or cmd == 'UNIXTHREAD': self.parse_thread(lc) elif cmd == 'LOADFVMLIB' or cmd == 'IDFVMLIB': self.parse_fvmlib(lc) elif cmd == 'IDENT': self.parse_ident(lc) elif cmd == 'FVMFILE': self.parse_fvmfile(lc) elif cmd == 'PREPAGE': self.parse_prepage(lc) elif cmd == 'DYSYMTAB': self.parse_dysymtab(lc) elif (cmd == 'LOAD_DYLIB' or cmd == 'ID_DYLIB' or cmd == 'LAZY_LOAD_DYLIB' or cmd == 'LOAD_WEAK_DYLIB' or cmd == 'REEXPORT_DYLIB' or cmd == 'LOAD_UPWARD_DYLIB'): self.parse_load_dylib(lc) elif (cmd == 'LOAD_DYLINKER' or cmd == 'ID_DYLINKER' or cmd == 'DYLD_ENVIRONMENT'): self.parse_load_dylinker(lc) elif cmd == 'PREBOUND_DYLIB': self.parse_prebound_dylib(lc) elif cmd == 'ROUTINES' or cmd == 'ROUTINES_64': self.parse_routines(lc) elif (cmd == 'SUB_FRAMEWORK' or cmd == 'SUB_UMBRELLA' or cmd == 'SUB_CLIENT' or cmd == 'SUB_LIBRARY'): self.parse_sub_stuff(lc) elif cmd == 'TWOLEVEL_HINTS': self.parse_twolevel_hints(lc) elif cmd == 'PREBIND_CKSUM': self.parse_prebind_cksum(lc) elif cmd == 'UUID': self.parse_uuid(lc) elif (cmd == 'CODE_SIGNATURE' or cmd == 'SEGMENT_SPLIT_INFO' or cmd == 'FUNCTION_STARTS' or cmd == 'DATA_IN_CODE' or cmd == 'DYLIB_CODE_SIGN_DRS' or cmd == 'LINKER_OPTIMIZATION_HINT'): self.parse_linkedit_data(lc) elif cmd == 'ENCRYPTION_INFO' or cmd == 'ENCRYPTION_INFO_64': self.parse_encryption_info(lc) elif cmd == 'DYLD_INFO' or cmd == 'DYLD_INFO_ONLY': self.parse_dyld_info(lc) elif (cmd == 'VERSION_MIN_MACOSX' or cmd == 'VERSION_MIN_IPHONEOS' or cmd == 'VERSION_MIN_WATCHOS'): self.parse_version_min_os(lc) elif cmd == 'SOURCE_VERSION': self.parse_source_version(lc) elif cmd == 'LINKER_OPTION': self.parse_linker_option(lc) elif cmd == 'RPATH': self.parse_rpath(lc) elif cmd == 'MAIN': self.parse_main(lc)
def parse_macho(self, macho): self.f.seek(macho.offset) # skip magic self.f.read(4) cputype = get_int(self.f) subtype = get_int(self.f) filetype = get_int(self.f) nlcs = get_int(self.f) slcs = get_int(self.f) flags = get_int(self.f) if macho.is_64_bit(): # skip padding self.f.read(4) if macho.is_little(): cputype = little(cputype, 'I') subtype = little(subtype, 'I') filetype = little(filetype, 'I') nlcs = little(nlcs, 'I') slcs = little(slcs, 'I') flags = little(flags, 'I') try: cpu = dictionary.cputypes[cputype][-2] except: cpu = cputype data = { 'offset': macho.offset + 4, 'cputype': cputype } a = Abnormality(title='UNKNOWN CPUTYPE', data=data) self.add_abnormality(a) try: subtype = dictionary.cputypes[cputype][subtype] except: data = { 'offset': macho.offset + 8, 'cputype': cputype, 'subtype': subtype } a = Abnormality(title='UNKNOWN SUBTYPE', data=data) self.add_abnormality(a) try: filetype = dictionary.filetypes[filetype] except: data = { 'offset': macho.offset + 12, 'filetype': filetype } a = Abnormality(title='UNKNOWN FILETYPE', data=data) self.add_abnormality(a) flags = self.list_macho_flags(flags) macho.cputype = cpu macho.subtype = subtype macho.filetype = filetype macho.nlcs = nlcs macho.slcs = slcs macho.flags = flags lc = LoadCommander(f=self.f, macho=macho, file_size=self.file.size) lc.parse_lcs() self.abnormalities += lc.abnormalities # Need to investigate whether the presence of a # symbol/string table is expected and whether the # abscence is indicative of shenanigans. if macho.has_lc('SYMTAB'): self.parse_syms(macho) self.parse_imports_and_strings(macho) if macho.has_lc('CODE_SIGNATURE'): self.parse_sig(macho) if not macho.is_archive(): self.file.content = macho
def parse_codedirectory(self, signature, offset): prev = self.f.tell() true_offset = signature.offset + offset self.f.seek(true_offset) magic = get_int(self.f) if magic != dictionary.signatures['CODEDIRECTORY']: data = { 'offset': true_offset, 'magic': hex(magic), 'expected': hex(dictionary.signatures['CODEDIRECTORY']) } a = Abnormality(title='BAD MAGIC - CODEDIRECTORY', data=data) self.add_abnormality(a) self.f.seek(prev) return # Skip size self.f.read(4) version = get_int(self.f) # Not sure how to parse flags yet... flags = get_int(self.f) hash_offset = get_int(self.f) ident_offset = get_int(self.f) n_special_slots = get_int(self.f) n_code_slots = get_int(self.f) code_limit = get_int(self.f) hash_size = int(self.f.read(1).encode('hex'), 16) hash_type = dictionary.hashes[int(self.f.read(1).encode('hex'), 16)] if version >= 0x20200: platform = int(self.f.read(1).encode('hex'), 16) else: # Skip spare1 self.f.read(1) page_size = int(round(exp(int(self.f.read(1).encode('hex'), 16) * log(2)))) # Skip spare2 self.f.read(4) if version >= 0x20100: scatter_offset = get_int(self.f) if version >= 0x20200: team_id_offset = get_int(self.f) self.f.seek(true_offset + team_id_offset) team_id = readstring(self.f) self.f.seek(true_offset + ident_offset) identity = readstring(self.f) codedirectory = CodeDirectory(version=version, flags=flags, hash_offset=hash_offset, n_special_slots=n_special_slots, n_code_slots=n_code_slots, code_limit=code_limit, hash_size=hash_size, hash_type=hash_type, page_size=page_size, identity=identity) if version >= 0x20100: codedirectory.scatter_offset = scatter_offset if version >= 0x20200: codedirectory.platform = platform codedirectory.team_id_offset = team_id_offset codedirectory.team_id = team_id self.f.seek(true_offset + hash_offset - n_special_slots * hash_size) count = n_special_slots + n_code_slots while count > 0: hash = self.f.read(hash_size).encode('hex') codedirectory.add_hash(hash) count -= 1 signature.codedirectory = codedirectory self.f.seek(prev)
def parse_lcs(self): for i in range(self.macho.nlcs): cmd = get_int(self.f) size = get_int(self.f) if self.macho.endi == 'little': cmd = little(cmd, 'I') size = little(size, 'I') try: cmd = dictionary.loadcommands[cmd] except: data = { 'offset': self.f.tell() - 8, 'cmd': cmd } a = Abnormality(title='UNKNOWN LOADCOMMAND', data=data) self.addAbnormality(a) lc = LoadCommand(cmd=cmd, size=size) self.macho.add_lc(lc) self.f.read(size - 8) continue lc = LoadCommand(cmd=cmd, size=size) if cmd == 'SEGMENT' or cmd == 'SEGMENT_64': self.parse_segment(lc) elif cmd == 'SYMTAB': self.parse_symtab(lc) elif cmd == 'SYMSEG': self.parse_symseg(lc) elif cmd == 'THREAD' or cmd == 'UNIXTHREAD': self.parse_thread(lc) elif cmd == 'LOADFVMLIB' or cmd == 'IDFVMLIB': self.parse_fvmlib(lc) elif cmd == 'IDENT': self.parse_ident(lc) elif cmd == 'FVMFILE': self.parse_fvmfile(lc) elif cmd == 'PREPAGE': self.parse_prepage(lc) elif cmd == 'DYSYMTAB': self.parse_dysymtab(lc) elif (cmd == 'LOAD_DYLIB' or cmd == 'ID_DYLIB' or cmd == 'LAZY_LOAD_DYLIB' or cmd == 'LOAD_WEAK_DYLIB' or cmd == 'REEXPORT_DYLIB' or cmd == 'LOAD_UPWARD_DYLIB'): self.parse_load_dylib(lc) elif (cmd == 'LOAD_DYLINKER' or cmd == 'ID_DYLINKER' or cmd == 'DYLD_ENVIRONMENT'): self.parse_load_dylinker(lc) elif cmd == 'PREBOUND_DYLIB': self.parse_prebound_dylib(lc) elif cmd == 'ROUTINES' or cmd == 'ROUTINES_64': self.parse_routines(lc) elif (cmd == 'SUB_FRAMEWORK' or cmd == 'SUB_UMBRELLA' or cmd == 'SUB_CLIENT' or cmd == 'SUB_LIBRARY'): self.parse_sub_stuff(lc) elif cmd == 'TWOLEVEL_HINTS': self.parse_twolevel_hints(lc) elif cmd == 'PREBIND_CKSUM': self.parse_prebind_cksum(lc) elif cmd == 'UUID': self.parse_uuid(lc) elif (cmd == 'CODE_SIGNATURE' or cmd == 'SEGMENT_SPLIT_INFO' or cmd == 'FUNCTION_STARTS' or cmd == 'DATA_IN_CODE' or cmd == 'DYLIB_CODE_SIGN_DRS' or cmd == 'LINKER_OPTIMIZATION_HINT'): self.parse_linkedit_data(lc) elif cmd == 'ENCRYPTION_INFO' or cmd == 'ENCRYPTION_INFO_64': self.parse_encryption_info(lc) elif cmd == 'DYLD_INFO' or cmd == 'DYLD_INFO_ONLY': self.parse_dyld_info(lc) elif (cmd == 'VERSION_MIN_MACOSX' or cmd == 'VERSION_MIN_IPHONEOS' or cmd == 'VERSION_MIN_WATCHOS'): self.parse_version_min_os(lc) elif cmd == 'SOURCE_VERSION': self.parse_source_version(lc) elif cmd == 'LINKER_OPTION': self.parse_linker_option(lc) elif cmd == 'RPATH': self.parse_rpath(lc) elif cmd == 'MAIN': self.parse_main(lc)
def parse_expression(self, in_or): # Zero out flags in high byte operator = dictionary.operators[get_int(self.f) & 0xfff] expression = '' if operator == 'False': expression += 'never' elif operator == 'True': expression += 'always' elif operator == 'Ident': expression += 'identity "' + str(self.parse_data()) + '"' elif operator == 'AppleAnchor': expression += 'anchor apple' elif operator == 'AppleGenericAnchor': expression += 'anchor apple generic' elif operator == 'AnchorHash': cert_slot = get_int(self.f) if cert_slot in dictionary.cert_slots: cert_slot = dictionary.cert_slots[cert_slot] else: cert_slot = str(cert_slot) expression += ('certificate ' + cert_slot + ' = ' + str(self.parse_data().encode('hex'))) elif operator == 'InfoKeyValue': expression += ('info[' + str(self.parse_data()) + '] = "' + str(self.parse_data()) + '"') elif operator == 'And': if in_or: expression += ('(' + self.parse_expression(False) + ' and ' + self.parse_expression(False) + ')') else: expression += (self.parse_expression(False) + ' and ' + self.parse_expression(False)) elif operator == 'Or': if in_or: expression += ('(' + self.parse_expression(True) + ' or ' + self.parse_expression(True) + ')') else: expression += (self.parse_expression(True) + ' or ' + self.parse_expression(True)) elif operator == 'Not': expression += '! ' + self.parse_expression(False) elif operator == 'CDHash': expression += 'cdhash ' + str(self.parse_data().encode('hex')) elif operator == 'InfoKeyField': expression += ('info[' + str(self.parse_data()) + ']' + self.parse_match()) elif operator == 'EntitlementField': expression += ('entitlement[' + str(self.parse_data()) + ']' + self.parse_match()) elif operator == 'CertField': cert_slot = get_int(self.f) if cert_slot in dictionary.cert_slots: cert_slot = dictionary.cert_slots[cert_slot] else: cert_slot = str(cert_slot) expression += ('certificate ' + cert_slot + '[' + str(self.parse_data()) + ']' + self.parse_match()) elif operator == 'CertGeneric': cert_slot = get_int(self.f) if cert_slot in dictionary.cert_slots: cert_slot = dictionary.cert_slots[cert_slot] else: cert_slot = str(cert_slot) length = get_int(self.f) expression += ('certificate ' + cert_slot + '[field.' + self.to_oid(length) + ']' + self.parse_match()) elif operator == 'CertPolicy': cert_slot = get_int(self.f) if cert_slot in dictionary.cert_slots: cert_slot = dictionary.cert_slots[cert_slot] else: cert_slot = str(cert_slot) expression += ('certificate ' + cert_slot + '[policy.' + str(self.parse_data()) + ']' + self.parse_match()) elif operator == 'TrustedCert': cert_slot = get_int(self.f) if cert_slot in dictionary.cert_slots: cert_slot = dictionary.cert_slots[cert_slot] else: cert_slot = str(cert_slot) expression += 'certificate ' + cert_slot + ' trusted' elif operator == 'TrustedCerts': expression += 'anchor trusted' elif operator == 'NamedAnchor': expression += 'anchor apple ' + str(self.parse_data()) elif operator == 'NamedCode': expression += '(' + str(self.parse_data()) + ')' elif operator == 'Platform': expression += 'platform = ' + str(get_int(self.f)) if isinstance(expression, unicode): return expression else: return unicode(expression, errors='replace')
def parse_data(self): length = get_int(self.f) data = self.f.read(length) # Skip padding self.f.read(-length & 3) return data