def set_datas(self, funcs): """ Load data values and perform analysis :param funcs: function list """ self.section_collect() self.data_collect() self.locations = self.label_locate() self.label_set = set(map(lambda e: e[1], self.label)) self.label_arr = sorted(self.label_set) fl = sorted(funcs, cmp=lambda f1, f2: f1.func_begin_addr - f2.func_begin_addr) self.fl_sort = map( lambda f: ft(f.func_name, f.func_begin_addr, f.func_end_addr), fl) self.text_mem_addrs = map(lambda a: int(a.strip().rstrip(':'), 16), read_file('text_mem.info')) self.text_mem_arr = self.text_mem_addrs self.label_mem_arr = sorted(self.label_mem_addrs) self.set_assumption_flag() self.set_excluded_ranges() self.begin_addrs = map(lambda f: f.func_begin_addr, funcs) if ELF_utils.elf_32(): self.data_refer_solve() else: self.data_refer_solve_64()
def disassemble(self): """ Dump .text, .rodata, .data, .eh_frame, .got to file """ print colored('1: DISASSEMBLE', 'green') ret = os.system(config.objdump + ' -Dr -j .text ' + self.file + ' > ' + self.file + '.temp') self.checkret(ret, self.file + '.temp') if not ELF_utils.elf_arm(): if ELF_utils.elf_32(): pic_process.picprocess32(self.file) else: extern_symbol_process.globalvar(self.file) pic_process.picprocess64(self.file) ret = os.system( config.objdump + " -s -j .rodata " + self.file + " | grep \"^ \" | cut -d \" \" -f3,4,5,6 > rodata.info") self.checkret(ret, 'rodata.info') ret = os.system(config.objdump + " -s -j .data " + self.file + " | grep \"^ \" | cut -d \" \" -f3,4,5,6 > data.info") self.checkret(ret, 'data.info') os.system(config.objdump + " -s -j .eh_frame " + self.file + " | grep \"^ \" | cut -d \" \" -f3,4,5,6 > eh_frame.info") if not ELF_utils.elf_arm(): os.system( config.objdump + " -s -j .eh_frame_hdr " + self.file + " | grep \"^ \" | cut -d \" \" -f3,4,5,6 > eh_frame_hdr.info") os.system(config.objdump + " -s -j .got " + self.file + " | grep \"^ \" | cut -d \" \" -f3,4,5,6 > got.info")
def traverse(self): """ Analyze and modify instructions :return: list of generated labels """ if ELF_utils.elf_32() and not ELF_utils.elf_arm(): self.scan() return unify_int_list(self.label)
def picprocess32(filepath): """ PC relative operation in x86 32 bit code such as: call 804c452 <__x86.get_pc_thunk.bx> add $0x2b8e,%ebx mov $0x10, (%ebx) This operation usually loads into %ebx the address of the _GLOBAL_OFFSET_TABLE_ Further adjustments are operated in the analysis phase :param filepath: path to target executable """ if ELF_utils.elf_32() and ELF_utils.elf_exe() and not ELF_utils.elf_arm(): text_process_strip(filepath)
def gotexternals(self): """ Replace external symbols in .got """ with open('gotglobals.info') as f: def mapper(l): items = l.split() return (int(items[0], 16), items[1].split('@')[0]) syms = sorted(map(mapper, f), key=lambda e: e[0]) gotsec = self.sec['.got'] datatype = '.long ' if ELF_utils.elf_32() else '.quad ' skiplen = 3 if ELF_utils.elf_32() else 7 syms = filter( lambda s: gotsec.sec_begin_addr <= s[0] < gotsec.sec_begin_addr + gotsec.sec_size, syms) for s in syms: off = s[0] - gotsec.sec_begin_addr self.got_list[off] = ('S_' + dec_hex(s[0]) + ': ', datatype + s[1]) self.got_list[off + 1:off + 1 + skiplen] = [('', '')] * skiplen
def lib32_processing(self, instrs, funcs): """ Process PC relative code for x86 32 binaries :param instrs: instruction list :param funcs: function list """ if ELF_utils.elf_32() and not ELF_utils.elf_arm(): helper = lib32_helper(instrs, funcs) self.label += map( lambda addr: (self.check_sec(addr).sec_name, addr), helper.traverse()) return helper.get_instrs() return instrs
def bswapsub(reg, loc): """ Replace target register in bswap containing ret encoding bytes :param reg: bswapped register :param loc: instuction location :return: instruction list with replaced bswap """ sub = Types.RegClass('edi' if reg[0].lower() == 'e' else 'rdi') substack = Types.RegClass('edi' if ELF_utils.elf_32() else 'rdi') return set_inlineblocklocation(loc, [ Types.DoubleInstr(('push', substack, None, False)), Types.TripleInstr(('mov', sub, reg, None, False)), Types.DoubleInstr(('bswap', sub, None, False)), Types.TripleInstr(('mov', reg, sub, None, False)), Types.DoubleInstr(('pop', substack, None, False)), ])
def main(): if ELF_utils.elf_32(): lines = [] with open('instrs.info') as f: lines = f.readlines() for i in range(len(lines)): l = lines[i] l = l.strip() if 'nop' in l: items = l.split() if 'nop' == items[-1]: #l = l.split(':')[0] + " :" l = l lines[i] = l + "\n" with open('instrs.info', 'w') as f: f.writelines(lines)
def sectionProcess(self): """ Dump section boundaries """ badsec = '.got.plt' if ELF_utils.elf_32() else '.data.rel.ro' os.system( "readelf -SW " + self.file + " | awk \'/data|bss|got/ {print $2,$4,$5,$6} \' | awk \ '$1 != \"" + badsec + "\" {print $1,$2,$3,$4}\' > sections.info") os.system("readelf -SW " + self.file + " | awk \'/text/ {print $2,$4,$5,$6} \' > text_sec.info") os.system( "readelf -SW " + self.file + " | awk \'/init/ {print $2,$4,$5,$6} \' | awk \'$1 != \".init_array\" {print $1,$2,$3,$4}\' > init_sec.info" ) if os.path.isfile('init_array.info'): os.remove('init_array.info') os.system(config.objdump + " -s -j .init_array " + self.file + " >> init_array.info 2>&1") os.system("readelf -SW " + self.file + " | awk '$2==\".plt\" {print $2,$4,$5,$6}' > plt_sec.info")
def main_discover(filename): """ Find main function address and store it to file :param filename: path to target executable """ os.system('file ' + filename + ' > elf.info') if ELF_utils.elf_exe(): os.system(config.objdump + ' -Dr -j .text ' + filename + ' > ' + filename + '.temp') with open(filename + '.temp') as f: lines = f.readlines() ll = len(lines) main_symbol = "" if config.arch == config.ARCH_X86: for i in xrange(ll): l = lines[i] # when not using O2 to compile the original binary, we will remove all the _start code, # including the routine attached on the original program. In that case, we can not discover the # main function if "<__libc_start_main@plt>" in l: main_symbol = lines[i-1].split()[-1] if ELF_utils.elf_32() \ else lines[i-1].split()[-1].split(',')[0] if main_symbol == '%eax': # to fit gcc-4.8 -m32, the address is mov to %eax, then push to stack main_symbol = lines[i - 2].split()[-1].split( ',')[0].split('0x')[1] else: main_symbol = main_symbol.split('0x')[1] break #lines[i-1] = lines[i-1].replace(main_symbol, "main") #main_symbol = main_symbol[1:].strip() #print main_symbol ## Some of the PIC code/module rely on typical pattern to locate ## such as: ## 804c460: push %ebx ## 804c461: call 804c452 <__i686.get_pc_thunk.bx> ## 804c466: add $0x2b8e,%ebx ## 804c46c: sub $0x18,%esp ## What we can do this pattern match `<__i686.get_pc_thunk.bx>` and calculate ## the address by plusing 0x2b8e and 0x804c466, which equals to the begin address of GOT.PLT table ## symbols can be leveraged in re-assemble are ## _GLOBAL_OFFSET_TABLE_ == ** .got.plt ** ## .... elif config.arch == config.ARCH_ARMT: ## 1035c: 4803 ldr r0, [pc, #12] ; (1036c <_start+0x28>) ## 1035e: 4b04 ldr r3, [pc, #16] ; (10370 <_start+0x2c>) ## 10360: f7ff efde blx 10320 <__libc_start_main@plt> ## 10364: f7ff efe8 blx 10338 <abort@plt> ## ... ## 1036c: 0001052d for i in xrange(ll): l = lines[i] if '<__libc_start_main@plt>' in l: j = i - 1 while j > 0: if 'ldr' in lines[j] and 'r0' in lines[j]: pcraddr = lines[j].split( ';')[1].strip().split()[0][1:] break j -= 1 j = i + 1 while j < ll: if lines[j].strip().startswith(pcraddr): main_symbol = lines[j].split()[1] if len(main_symbol) < 8: main_symbol = lines[j + 1].split()[1] + main_symbol main_symbol = int(main_symbol.lstrip('0'), 16) & (-2) main_symbol = '%X' % main_symbol break j += 1 break else: raise Exception('Unknown arch') with open("main.info", 'w') as f: f.write('S_0x' + main_symbol.upper() + '\n')
if e.args[0] == 0: f.seek(e.args[1], os.SEEK_CUR) curr_line = e.state.line - 1 else: res[curr_line] += f.read(e.args[1]) curr_line += e.args[0] f.close() return res badbytes = set(('\xc2', '\xc3', '\xca', '\xcb')) badend = set(('\xff')) branchenc = set(('\x72', '\x76', '\xe3', '\x7c', '\x7e', '\xe9', '\xeb', '\x73', '\x77', '\x7d', '\x7f', '\x71', '\x7b', '\x79', '\x75', '\x70', '\x7a', '\x78', '\x74')) sled = 'jmp .+11;' + ('nop;' * 9) + ' ' barrier = '; mov %eax,%eax\n' if ELF_utils.elf_32() else '; mov %rax,%rax\n' indcodes = set((2,3,4,5)) def sled_insertion(fixed): """ Insert alignment enforcing sleds :param fixed: set of lines already fixed """ hexvals = get_hex() nmodified = 0 with open('final.s') as f: lines = f.readlines() for i in xrange(len(hexvals)): hv = hexvals[i]