示例#1
0
    def AnalyzeStringLiterals(self, elf_path, elf_string_positions):
        logging.debug('worker: AnalyzeStringLiterals() started.')
        # Read string_data from elf_path, to be shared by forked processes.
        address, offset, _ = string_extract.LookupElfRodataInfo(
            elf_path, self._tool_prefix)
        adjust = address - offset
        abs_string_positions = ((addr - adjust, s)
                                for addr, s in elf_string_positions)
        string_data = string_extract.ReadFileChunks(elf_path,
                                                    abs_string_positions)

        params = ((chunk, )
                  for chunk in self._encoded_string_addresses_by_path_chunks)
        # Order of the jobs doesn't matter since each job owns independent paths,
        # and our output is a dict where paths are the key.
        results = concurrent.BulkForkAndCall(
            string_extract.ResolveStringPieces,
            params,
            string_data=string_data,
            tool_prefix=self._tool_prefix,
            output_directory=self._output_directory)
        results = list(results)

        final_result = []
        for i in xrange(len(elf_string_positions)):
            final_result.append(
                concurrent.JoinEncodedDictOfLists([r[i] for r in results]))
        self._list_of_encoded_elf_string_positions_by_path = final_result
        logging.debug('worker: AnalyzeStringLiterals() completed.')
示例#2
0
 def _ReadElfStringData(self, elf_path, elf_string_ranges):
     # Read string_data from elf_path, to be shared with forked processes.
     address, offset, _ = string_extract.LookupElfRodataInfo(
         elf_path, self._tool_prefix)
     adjust = address - offset
     abs_elf_string_ranges = ((addr - adjust, s)
                              for addr, s in elf_string_ranges)
     return string_extract.ReadFileChunks(elf_path, abs_elf_string_ranges)
示例#3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--multiprocess', action='store_true')
    parser.add_argument('--tool-prefix', required=True)
    parser.add_argument('--output-directory', required=True)
    parser.add_argument('--elf-file', type=os.path.realpath)
    parser.add_argument('--show-names', action='store_true')
    parser.add_argument('--show-strings', action='store_true')
    parser.add_argument('objects', type=os.path.realpath, nargs='+')

    args = parser.parse_args()
    logging.basicConfig(
        level=logging.DEBUG,
        format='%(levelname).1s %(relativeCreated)6d %(message)s')

    if args.multiprocess:
        bulk_analyzer = _BulkObjectFileAnalyzerMaster(args.tool_prefix,
                                                      args.output_directory)
    else:
        concurrent.DISABLE_ASYNC = True
        bulk_analyzer = _BulkObjectFileAnalyzerWorker(args.tool_prefix,
                                                      args.output_directory)

    # Pass individually to test multiple calls.
    for path in args.objects:
        bulk_analyzer.AnalyzePaths([path])
    bulk_analyzer.SortPaths()

    names_to_paths = bulk_analyzer.GetSymbolNames()
    print('Found {} names'.format(len(names_to_paths)))
    if args.show_names:
        for name, paths in names_to_paths.iteritems():
            print('{}: {!r}'.format(name, paths))

    if args.elf_file:
        address, offset, size = string_extract.LookupElfRodataInfo(
            args.elf_file, args.tool_prefix)
        bulk_analyzer.AnalyzeStringLiterals(args.elf_file, ((address, size), ))

        positions_by_path = bulk_analyzer.GetStringPositions()[0]
        print('Found {} string literals'.format(
            sum(len(v) for v in positions_by_path.itervalues())))
        if args.show_strings:
            logging.debug('.rodata adjust=%d', address - offset)
            for path, positions in positions_by_path.iteritems():
                strs = string_extract.ReadFileChunks(
                    args.elf_file,
                    ((offset + addr, size) for addr, size in positions))
                print('{}: {!r}'.format(
                    path,
                    [s if len(s) < 20 else s[:20] + '...' for s in strs]))
示例#4
0
  def _ReadStringLiterals(self, thing=None, all_rodata=False, elf_path=None):
    """Returns a list of (symbol, string value) for all string literal symbols.

    E.g.:
      # Print sorted list of all string literals:
      Print(sorted(x[1] for x in ReadStringLiterals()))
    Args:
      thing: Can be a Symbol, iterable of symbols, or SizeInfo.
           Defaults to the current SizeInfo.
      all_rodata: Assume every symbol within .rodata that ends in a \0 is a
           string literal.
      elf_path: Path to the executable containing the symbol. Required only
          when auto-detection fails.
    """
    if thing is None:
      thing = self._size_infos[-1]
    if isinstance(thing, models.SizeInfo):
      thing = thing.raw_symbols.IterUniqueSymbols()
    elif isinstance(thing, models.BaseSymbol):
      thing = thing.IterLeafSymbols()

    thing, thing_clone = itertools.tee(thing)
    first_sym = next(thing_clone, None)
    if not first_sym:
      return []
    size_info = self._SizeInfoForSymbol(first_sym)
    tool_prefix = self._ToolPrefixForSymbol(size_info)
    elf_path = self._ElfPathForSymbol(
        size_info, tool_prefix, elf_path)

    address, offset, _ = string_extract.LookupElfRodataInfo(
        elf_path, tool_prefix)
    adjust = offset - address
    ret = []
    with open(elf_path, 'rb') as f:
      for symbol in thing:
        if symbol.section != 'r' or (
            not all_rodata and not symbol.IsStringLiteral()):
          continue
        f.seek(symbol.address + adjust)
        data = f.read(symbol.size_without_padding)
        # As of Oct 2017, there are ~90 symbols name .L.str(.##). These appear
        # in the linker map file explicitly, and there doesn't seem to be a
        # pattern as to which variables lose their kConstant name (the more
        # common case), or which string literals don't get moved to
        # ** merge strings (less common).
        if symbol.IsStringLiteral() or (
            all_rodata and data and data[-1] == '\0'):
          ret.append((symbol, data))
    return ret