def main(): parser = optparse.OptionParser( usage='usage: %prog [options] <binary> <orderfile>') parser.add_option('--target-arch', action='store', dest='arch', choices=['arm', 'arm64', 'x86', 'x86_64', 'x64', 'mips'], help='The target architecture for the binary.') parser.add_option( '--threshold', action='store', dest='threshold', default=20, type=int, help='The maximum allowed number of out-of-order symbols.') options, argv = parser.parse_args(sys.argv) if not options.arch: options.arch = cygprofile_utils.DetectArchitecture() if len(argv) != 3: parser.print_help() return 1 (binary_filename, orderfile_filename) = argv[1:] symbol_extractor.SetArchitecture(options.arch) symbol_infos = symbol_extractor.SymbolInfosFromBinary(binary_filename) if not _VerifySymbolOrder( [sym.strip() for sym in file(orderfile_filename)], symbol_infos, options.threshold): return 1
def main(): parser = optparse.OptionParser(usage= 'usage: %prog [options] <binary> <orderfile>') parser.add_option('--target-arch', action='store', dest='arch', choices=['arm', 'arm64', 'x86', 'x86_64', 'x64', 'mips'], help='The target architecture for the binary.') parser.add_option('--threshold', action='store', dest='threshold', default=1, help='The maximum allowed number of out-of-order symbols.') options, argv = parser.parse_args(sys.argv) if not options.arch: options.arch = cygprofile_utils.DetectArchitecture() if len(argv) != 3: parser.print_help() return 1 (binary_filename, orderfile_filename) = argv[1:] symbol_extractor.SetArchitecture(options.arch) obj_dir = cygprofile_utils.GetObjDir(binary_filename) symbol_to_sections_map = \ cyglog_to_orderfile.GetSymbolToSectionsMapFromObjectFiles(obj_dir) section_to_symbols_map = cygprofile_utils.InvertMapping( symbol_to_sections_map) symbols = patch_orderfile.GetSymbolsFromOrderfile(orderfile_filename, section_to_symbols_map) symbol_infos = symbol_extractor.SymbolInfosFromBinary(binary_filename) # Missing symbols is not an error since some of them can be eliminated through # inlining. (misordered_pairs_count, matched_symbols, _) = _CountMisorderedSymbols( symbols, symbol_infos) return (misordered_pairs_count > options.threshold) or (matched_symbols == 0)
def MatchSymbolsInRegularBuild(reached_symbol_infos, regular_native_lib_filename): """Match a list of symbols to canonical ones on the regular build. Args: reached_symbol_infos: ([symbol_extractor.SymbolInfo]) Reached symbol in the instrumented build. regular_native_lib_filename: (str) regular build filename. Returns: [symbol_extractor.SymbolInfo] list of matched canonical symbols. """ regular_build_symbol_infos = symbol_extractor.SymbolInfosFromBinary( regular_native_lib_filename) regular_build_symbol_names = set(s.name for s in regular_build_symbol_infos) reached_symbol_names = set(s.name for s in reached_symbol_infos) logging.info('Reached symbols = %d', len(reached_symbol_names)) matched_names = reached_symbol_names.intersection( regular_build_symbol_names) logging.info('Matched symbols = %d', len(matched_names)) symbol_name_to_primary = SymbolNameToPrimary(regular_build_symbol_infos) matched_primary_symbols = [] for symbol in reached_symbol_names: if symbol in matched_names: matched_primary_symbols.append(symbol_name_to_primary[symbol]) return matched_primary_symbols
def main(): parser = CreateArgumentParser() args = parser.parse_args() logging.basicConfig(level=logging.INFO) symbol_extractor.SetArchitecture(args.arch) logging.info('Parsing object files in %s', args.build_directory) object_files_symbols = _GetSymbolNameToFilename(args.build_directory) native_lib_filename = os.path.join(args.build_directory, 'lib.unstripped', args.native_library) if not os.path.exists(native_lib_filename): logging.error('Native library not found. Did you build the APK?') return 1 offset = 0 if args.residency: with open(args.residency) as f: residency = json.load(f) offset = residency['offset'] logging.info('Extracting symbols from %s', native_lib_filename) native_lib_symbols = symbol_extractor.SymbolInfosFromBinary( native_lib_filename) logging.info('%d Symbols found', len(native_lib_symbols)) logging.info('Mapping symbols and object files to code pages') page_to_symbols = CodePagesToMangledSymbols(native_lib_symbols, offset) page_to_object_files = CodePagesToObjectFiles(object_files_symbols, page_to_symbols) if args.reached_symbols_file: logging.info('Mapping reached symbols to code pages') reached_symbol_names = ReadReachedSymbols(args.reached_symbols_file) reached_data = CodePagesToReachedSize(reached_symbol_names, page_to_symbols) WriteReachedData(os.path.join(args.output_directory, 'reached.json'), reached_data) if not os.path.exists(args.output_directory): os.makedirs(args.output_directory) text_output_filename = os.path.join(args.output_directory, 'map.txt') json_output_filename = os.path.join(args.output_directory, 'map.json') WriteCodePageAttribution(page_to_object_files, text_output_filename, json_output_filename) directory = os.path.dirname(__file__) for filename in ['visualize.html', 'visualize.js', 'visualize.css']: if args.residency: shutil.copy(args.residency, os.path.join(args.output_directory, 'residency.json')) shutil.copy(os.path.join(directory, filename), os.path.join(args.output_directory, filename)) if args.start_server: os.chdir(args.output_directory) httpd = SocketServer.TCPServer( ('', args.port), SimpleHTTPServer.SimpleHTTPRequestHandler) logging.warning('Serving on port %d', args.port) httpd.serve_forever() return 0
def _GroupSymbolInfosFromBinary(binary_filename): """Group all the symbols from a binary by name and offset. Args: binary_filename: path to the binary. Returns: A tuple of dict: (offset_to_symbol_infos, name_to_symbol_infos): - offset_to_symbol_infos: {offset: [symbol_info1, ...]} - name_to_symbol_infos: {name: [symbol_info1, ...]} """ symbol_infos = symbol_extractor.SymbolInfosFromBinary(binary_filename) return _GroupSymbolInfos(symbol_infos)
def SymbolInfos(self): """The symbols associated with this processor's binary. The symbols are ordered by offset. Returns: [symbol_extractor.SymbolInfo] """ if self._symbol_infos is None: self._symbol_infos = symbol_extractor.SymbolInfosFromBinary( self._binary_filename) self._symbol_infos.sort(key=lambda s: s.offset) logging.info('%d symbols from %s', len(self._symbol_infos), self._binary_filename) return self._symbol_infos
def GetOffsetToSymbolArray(instrumented_native_lib_filename): """From the native library, maps .text offsets to symbols. Args: instrumented_native_lib_filename: (str) Native library filename. Has to be the instrumented version. Returns: [symbol_extractor.SymbolInfo or None] For every 4 bytes of the .text section, maps it to a symbol, or None. """ symbol_infos = symbol_extractor.SymbolInfosFromBinary( instrumented_native_lib_filename) logging.info('%d Symbols', len(symbol_infos)) return GetOffsetToSymbolInfo(symbol_infos)
def _GroupSymbolInfosFromBinary(binary_filename): """Group all the symbols from a binary by name and offset. Args: binary_filename: path to the binary. Returns: A tuple of dict: (offset_to_symbol_infos, name_to_symbol_infos): - offset_to_symbol_infos: {offset: [symbol_info1, ...]} - name_to_symbol_infos: {name: [symbol_info1, ...]} """ symbol_infos = symbol_extractor.SymbolInfosFromBinary(binary_filename) symbol_infos_no_suffixes = [ s._replace(name=RemoveSuffixes(s.name)) for s in symbol_infos] return (symbol_extractor.GroupSymbolInfosByOffset(symbol_infos_no_suffixes), symbol_extractor.GroupSymbolInfosByName(symbol_infos_no_suffixes))
def _GroupSymbolsByOffset(binary_filename): """Produce a map symbol name -> all symbol names at same offset. Suffixes are stripped. """ symbol_infos = [ s._replace(name=RemoveSuffixes(s.name)) for s in symbol_extractor.SymbolInfosFromBinary(binary_filename)] offset_map = symbol_extractor.GroupSymbolInfosByOffset(symbol_infos) missing_offsets = 0 sym_to_matching = {} for sym in symbol_infos: if sym.offset not in offset_map: missing_offsets += 1 continue matching = [s.name for s in offset_map[sym.offset]] assert sym.name in matching sym_to_matching[sym.name] = matching return sym_to_matching
def main(): parser = optparse.OptionParser(usage= 'usage: %prog [options] <binary> <orderfile>') parser.add_option('--target-arch', help='Unused') parser.add_option('--threshold', action='store', dest='threshold', default=80, type=int, help='The maximum allowed number of out-of-order symbols.') options, argv = parser.parse_args(sys.argv) if len(argv) != 3: parser.print_help() return 1 (binary_filename, orderfile_filename) = argv[1:] symbol_infos = symbol_extractor.SymbolInfosFromBinary(binary_filename) if not _VerifySymbolOrder([sym.strip() for sym in file(orderfile_filename)], symbol_infos, options.threshold): return 1
def _ExtractAndProcessSymbols(native_library_filename): """Extracts, sorts and filters symbols. Args: native_library_filename: (str) Path to the native library Returns: [symbol_extractor.SymbolInfo], sorted by offset, without thunks. """ logging.info('Extracting symbols') symbol_infos = symbol_extractor.SymbolInfosFromBinary( native_library_filename) logging.info('%d symbols' % len(symbol_infos)) symbol_infos.sort(key=operator.attrgetter('offset')) real_symbols = [ s for s in symbol_infos if not s.name.startswith('__ThumbV7PILongThunk_') ] logging.info('%d long jumps Thunks' % (len(symbol_infos) - len(real_symbols))) return real_symbols
def CodePagesToMangledSymbols(native_library_filename): """From the native library, groups the symbol per code page. Args: native_library_filename: (str) Native library path. Returns: {offset: [(mangled_name, size_in_page), ...]} """ symbols = symbol_extractor.SymbolInfosFromBinary(native_library_filename) # Different symbols can be at the same address, through identical code folding # for instance. In this case, only keep the first one. This is not ideal, as # file attribution will be incorrect in this case. However ICF mostly works # with small symbols, so it shouldn't impact numbers too much. result = collections.defaultdict(set) known_offsets = set() for s in symbols: assert s.offset % 2 == 0, 'Wrong alignment' if s.offset in known_offsets: continue known_offsets.add(s.offset) start, end = (s.offset, (s.offset + s.size)) start_page, end_page = start & _PAGE_MASK, end & _PAGE_MASK page = start_page while page <= end_page: symbol_start_in_page = max(page, start) symbol_end_in_page = min(page + _PAGE_SIZE, end) size_in_page = symbol_end_in_page - symbol_start_in_page result[page].add((s.name, size_in_page)) page += _PAGE_SIZE for page in result: total_size = sum(s[1] for s in result[page]) if total_size > _PAGE_SIZE: logging.warning( 'Too many symbols in page (%d * 4k)! Total size: %d' % (page / _PAGE_SIZE, total_size)) return result
def _GroupLibrarySymbolInfosByOffset(lib_filename): """Returns a dict {offset: [SymbolInfo]} from a library.""" symbol_infos = symbol_extractor.SymbolInfosFromBinary(lib_filename) return symbol_extractor.GroupSymbolInfosByOffset(symbol_infos)