def __init__(self, binary_file_name, show_symbol_files): self.binary_file_name = binary_file_name self.show_symbol_files = show_symbol_files self.nm_available = False self.symbol_to_address_dict = {} self.functions_table = FunctionTable() if shutil.which("nm") is not None: self.nm_available = True self.command_args = ['llvm-objdump', '-d', self.binary_file_name] else: self.command_args = [ 'llvm-objdump', '-d', '-l', self.binary_file_name ] self.symbol_table_args = ['llvm-objdump', '-t', self.binary_file_name] self.functions_code_seperator = "\n\n" self.functions_name_extractor_pattern = '(^[\D\S]\S+):\n' self.address_extraction_pattern = '([0-9a-fA-F]{16,})(.+).text\s+([0-9a-fA-F]+) (\S+)' self.symbol_location_extraction_pattern = '(\S+):\n; (.+:\d+)\n' self.calls_statement_matching_pattern = '\s*([a-fA-F0-9]+):.+callq\s+(-*\d+)\s+<(.+)>' self.construct_symbols_to_address_dict() self.construct_functions_table() self.analyze()
def __init__(self, binary_file_name, show_symbol_files): self.binary_file_name = binary_file_name self.show_symbol_files = show_symbol_files self.functions_table = FunctionTable() self.nm_available = False if shutil.which("nm") is not None: self.nm_available = True self.command_args = ['objdump', '-C', '-d', self.binary_file_name] else: self.command_args = [ 'objdump', '-C', '-d', '-l', self.binary_file_name ] self.functions_code_seperator = "\n\n" self.functions_name_extractor_pattern = '([0-9a-fA-F]{16,}) <(.+)>:' self.symbol_location_extraction_pattern = '.+\n\S+\(\):\n(\S+:\d+)' self.calls_statement_matching_pattern = '(callq|jmpq)\s+(\S+)\s+<(.+)>' self.leas_statement_matching_pattern = '(lea)\s+.+# (\S+)\s+<(.+)>\s*' self.construct_functions_table()
class ObjDumpParser(): def __init__(self, binary_file_name, show_symbol_files): self.binary_file_name = binary_file_name self.show_symbol_files = show_symbol_files self.functions_table = FunctionTable() self.nm_available = False if shutil.which("nm") is not None: self.nm_available = True self.command_args = ['objdump', '-C', '-d', self.binary_file_name] else: self.command_args = [ 'objdump', '-C', '-d', '-l', self.binary_file_name ] self.functions_code_seperator = "\n\n" self.functions_name_extractor_pattern = '([0-9a-fA-F]{16,}) <(.+)>:' self.symbol_location_extraction_pattern = '.+\n\S+\(\):\n(\S+:\d+)' self.calls_statement_matching_pattern = '(callq|jmpq)\s+(\S+)\s+<(.+)>' self.leas_statement_matching_pattern = '(lea)\s+.+# (\S+)\s+<(.+)>\s*' self.construct_functions_table() self.analyze() def construct_functions_table(self): elf_ouput = subprocess.check_output(self.command_args, encoding='utf-8') fcn_listings = elf_ouput.split(self.functions_code_seperator) loc_table = None if self.show_symbol_files and self.nm_available: loc_table = get_locations_table_through_nm(self.binary_file_name) header_re = re.compile(self.functions_name_extractor_pattern) location_re = re.compile(self.symbol_location_extraction_pattern) for listing in fcn_listings: m = header_re.match(listing) # Check if it is indeed a function if not m: continue address = int(m[1], 16) loc = [] if self.show_symbol_files: if loc_table and address in loc_table: loc = loc_table[address] else: lm = location_re.match(listing) if lm: loc = lm[1] f = Function( name=m[2], location=loc, address=int(m[1], 16), # Convert to hex for look up code=listing, callees=[], callers=[]) self.functions_table.add(f) return def analyze(self): # Consider both calls and jmps as calls. callstmt = re.compile(self.calls_statement_matching_pattern) leastmt = re.compile(self.leas_statement_matching_pattern) for fcn in self.functions_table.functions(): callees = callstmt.findall(fcn.code) + leastmt.findall(fcn.code) if len(callees) == 0: #print('%s %s calls []' % (hex(fcn.address), fcn.name)) continue #print('%s %s calls' % (hex(fcn.address), fcn.name)) for c in callees: # Avoid recursive calls. if c == fcn: continue callee_address = int(c[1], 16) callee_name = c[2] #print(' %s %s' % (hex(callee_address), callee_name)) try: callee_fcn = self.functions_table.lookup(callee_address) if callee_fcn not in fcn.callees: fcn.callees.append(callee_fcn) if fcn not in callee_fcn.callers: callee_fcn.callers.append(fcn) except: #print('Could not resolve callee %s %s' % (callee_address, callee_name)) pass
class LLVMObjDumpParser(): def __init__(self, binary_file_name, show_symbol_files): self.binary_file_name = binary_file_name self.show_symbol_files = show_symbol_files self.nm_available = False self.symbol_to_address_dict= {} self.functions_table = FunctionTable() if shutil.which("nm") is not None: self.nm_available = True self.command_args = ['llvm-objdump', '-d',self.binary_file_name] else: self.command_args = ['llvm-objdump', '-d', '-l', self.binary_file_name] self.symbol_table_args = ['llvm-objdump', '-t', self.binary_file_name] self.functions_code_seperator = "\n\n" self.functions_name_extractor_pattern = '(^[\D\S]\S+):\n' self.address_extraction_pattern = '([0-9a-fA-F]{16,})(.+).text\s+([0-9a-fA-F]+) (\S+)' self.symbol_location_extraction_pattern = '(\S+):\n; (.+:\d+)\n' self.calls_statement_matching_pattern = '\s*([a-fA-F0-9]+):.+callq\s+(-*\d+)\s+<(.+)>' self.construct_symbols_to_address_dict() self.construct_functions_table() def construct_symbols_to_address_dict(self): symbols_output = subprocess.check_output(self.symbol_table_args, encoding='utf-8') symbols_output_lines = symbols_output.split("\n") symbols_extraction_matcher = re.compile(self.address_extraction_pattern) for line in symbols_output_lines: details = symbols_extraction_matcher.match(line) if details: address = int(details[1], 16) self.symbol_to_address_dict[details[4]] = address return def construct_functions_table(self): elf_output = subprocess.check_output(self.command_args, encoding='utf-8') elf_output = elf_output.replace("Disassembly of section .text:", "Disassembly of section .text:\n") fcn_listings = elf_output.split(self.functions_code_seperator) loc_table = None if self.show_symbol_files and self.nm_available: loc_table = get_locations_table_through_nm(self.binary_file_name) header_re = re.compile(self.functions_name_extractor_pattern) location_re = re.compile(self.symbol_location_extraction_pattern) for listing in fcn_listings: details = header_re.match(listing) # Check if it is indeed a function if not details: continue function_name = details[1] address = self.symbol_to_address_dict[function_name] loc = [] if self.show_symbol_files: if loc_table and address in loc_table: loc = loc_table[address] else: lm = location_re.match(listing) if lm: loc = lm[2] f = Function(name=function_name, location=loc, address=address, # Convert to hex for look up code=listing, callees=[], callers=[]) self.functions_table.add(f) return def analyze(self): # Consider both calls and jmps as calls. callstmt = re.compile(self.calls_statement_matching_pattern) for fcn in self.functions_table.functions(): callstmt = re.compile(self.calls_statement_matching_pattern) callees = callstmt.findall(fcn.code) if len(callees) == 0: continue else: for callee in callees: callee_address = int(callee[0], 16) + int(callee[1], 10) + 5 try: callee_fcn = self.functions_table.lookup(callee_address) if callee_fcn not in fcn.callees: fcn.callees.append(callee_fcn) if fcn not in callee_fcn.callers: callee_fcn.callers.append(fcn) except: #print('Could not resolve callee %s %s' % (callee_address, callee_name)) pass