示例#1
0
def CreateSizeInfo(map_path,
                   elf_path,
                   tool_prefix,
                   output_directory,
                   normalize_names=True):
    """Creates a SizeInfo.

  Args:
    map_path: Path to the linker .map(.gz) file to parse.
    elf_path: Path to the corresponding unstripped ELF file. Used to find symbol
        aliases and inlined functions. Can be None.
    tool_prefix: Prefix for c++filt & nm (required).
    output_directory: Build output directory. If None, source_paths and symbol
        alias information will not be recorded.
  """
    source_mapper = None
    if output_directory:
        # Start by finding the elf_object_paths, so that nm can run on them while
        # the linker .map is being parsed.
        logging.info('Parsing ninja files.')
        source_mapper, elf_object_paths = ninja_parser.Parse(
            output_directory, elf_path)
        logging.debug('Parsed %d .ninja files.',
                      source_mapper.parsed_file_count)
        assert not elf_path or elf_object_paths, (
            'Failed to find link command in ninja files for ' +
            os.path.relpath(elf_path, output_directory))

    if elf_path:
        # Run nm on the elf file to retrieve the list of symbol names per-address.
        # This list is required because the .map file contains only a single name
        # for each address, yet multiple symbols are often coalesced when they are
        # identical. This coalescing happens mainly for small symbols and for C++
        # templates. Such symbols make up ~500kb of libchrome.so on Android.
        elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix)

        # Run nm on all .o/.a files to retrieve the symbol names within them.
        # The list is used to detect when mutiple .o files contain the same symbol
        # (e.g. inline functions), and to update the object_path / source_path
        # fields accordingly.
        # Looking in object files is required because the .map file choses a
        # single path for these symbols.
        # Rather than record all paths for each symbol, set the paths to be the
        # common ancestor of all paths.
        if output_directory:
            bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix,
                                                      output_directory)
            bulk_analyzer.AnalyzePaths(elf_object_paths)

    logging.info('Parsing Linker Map')
    with _OpenMaybeGz(map_path) as map_file:
        section_sizes, raw_symbols = (
            linker_map_parser.MapFileParser().Parse(map_file))

    if elf_path:
        logging.debug('Validating section sizes')
        elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix)
        for k, v in elf_section_sizes.iteritems():
            if v != section_sizes.get(k):
                logging.error(
                    'ELF file and .map file do not agree on section sizes.')
                logging.error('.map file: %r', section_sizes)
                logging.error('readelf: %r', elf_section_sizes)
                sys.exit(1)

    if elf_path and output_directory:
        missed_object_paths = _DiscoverMissedObjectPaths(
            raw_symbols, elf_object_paths)
        bulk_analyzer.AnalyzePaths(missed_object_paths)
        bulk_analyzer.Close()

    if source_mapper:
        logging.info('Looking up source paths from ninja files')
        _ExtractSourcePaths(raw_symbols, source_mapper)
        assert source_mapper.unmatched_paths_count == 0, (
            'One or more source file paths could not be found. Likely caused by '
            '.ninja files being generated at a different time than the .map file.'
        )

    logging.info('Stripping linker prefixes from symbol names')
    _StripLinkerAddedSymbolPrefixes(raw_symbols)
    # Map file for some reason doesn't unmangle all names.
    # Unmangle prints its own log statement.
    _UnmangleRemainingSymbols(raw_symbols, tool_prefix)

    if elf_path:
        logging.info('Adding aliased symbols, as reported by nm')
        # This normally does not block (it's finished by this time).
        aliases_by_address = elf_nm_result.get()
        _AddSymbolAliases(raw_symbols, aliases_by_address)

        if output_directory:
            # For aliases, this provides path information where there wasn't any.
            logging.info('Computing ancestor paths for inline functions and '
                         'normalizing object paths')

            object_paths_by_name = bulk_analyzer.Get()
            logging.debug(
                'Fetched path information for %d symbols from %d files',
                len(object_paths_by_name),
                len(elf_object_paths) + len(missed_object_paths))
            _ComputeAncestorPathsAndNormalizeObjectPaths(
                raw_symbols, object_paths_by_name, source_mapper)

    if not elf_path or not output_directory:
        logging.info('Normalizing object paths.')
        for symbol in raw_symbols:
            symbol.object_path = _NormalizeObjectPath(symbol.object_path)

    # Padding not really required, but it is useful to check for large padding and
    # log a warning.
    logging.info('Calculating padding')
    _CalculatePadding(raw_symbols)

    # Do not call _NormalizeNames() during archive since that method tends to need
    # tweaks over time. Calling it only when loading .size files allows for more
    # flexability.
    if normalize_names:
        _NormalizeNames(raw_symbols)

    logging.info('Processed %d symbols', len(raw_symbols))
    size_info = models.SizeInfo(section_sizes, raw_symbols)

    if logging.getLogger().isEnabledFor(logging.INFO):
        for line in describe.DescribeSizeInfoCoverage(size_info):
            logging.info(line)
    logging.info('Recorded info for %d symbols', len(size_info.raw_symbols))
    return size_info
示例#2
0
def CreateSectionSizesAndSymbols(
      map_path=None, tool_prefix=None, output_directory=None, elf_path=None,
      apk_path=None, track_string_literals=True, metadata=None,
      apk_elf_result=None, pak_files=None, pak_info_file=None,
      knobs=SectionSizeKnobs()):
  """Creates sections sizes and symbols for a SizeInfo.

  Args:
    map_path: Path to the linker .map(.gz) file to parse.
    elf_path: Path to the corresponding unstripped ELF file. Used to find symbol
        aliases and inlined functions. Can be None.
    tool_prefix: Prefix for c++filt & nm (required).
    output_directory: Build output directory. If None, source_paths and symbol
        alias information will not be recorded.
    track_string_literals: Whether to break down "** merge string" sections into
        smaller symbols (requires output_directory).
  """
  source_mapper = None
  elf_object_paths = None
  if output_directory:
    # Start by finding the elf_object_paths, so that nm can run on them while
    # the linker .map is being parsed.
    logging.info('Parsing ninja files.')
    source_mapper, elf_object_paths = ninja_parser.Parse(
        output_directory, elf_path)
    logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count)
    assert not elf_path or elf_object_paths, (
        'Failed to find link command in ninja files for ' +
        os.path.relpath(elf_path, output_directory))

  section_sizes, raw_symbols = _ParseElfInfo(
      map_path, elf_path, tool_prefix, output_directory, track_string_literals,
      elf_object_paths)
  elf_overhead_size = _CalculateElfOverhead(section_sizes, elf_path)

  pak_symbols_by_id = None
  if apk_path:
    pak_symbols_by_id = _FindPakSymbolsFromApk(apk_path, output_directory,
                                               knobs)
    section_sizes, elf_overhead_size = _ParseApkElfSectionSize(
        section_sizes, metadata, apk_elf_result)
    raw_symbols.extend(_ParseApkOtherSymbols(section_sizes, apk_path))
  elif pak_files and pak_info_file:
    pak_symbols_by_id = _FindPakSymbolsFromFiles(
        pak_files, pak_info_file, output_directory)

  if elf_path:
    elf_overhead_symbol = models.Symbol(
        models.SECTION_OTHER, elf_overhead_size, full_name='Overhead: ELF file')
    prev = section_sizes.setdefault(models.SECTION_OTHER, 0)
    section_sizes[models.SECTION_OTHER] = prev + elf_overhead_size
    raw_symbols.append(elf_overhead_symbol)

  if pak_symbols_by_id:
    object_paths = (p for p in source_mapper.IterAllPaths() if p.endswith('.o'))
    pak_raw_symbols = _ParsePakSymbols(
        section_sizes, object_paths, output_directory, pak_symbols_by_id)
    raw_symbols.extend(pak_raw_symbols)

  _ExtractSourcePathsAndNormalizeObjectPaths(raw_symbols, source_mapper)
  logging.info('Converting excessive aliases into shared-path symbols')
  _CompactLargeAliasesIntoSharedSymbols(raw_symbols, knobs)
  logging.debug('Connecting nm aliases')
  _ConnectNmAliases(raw_symbols)
  return section_sizes, raw_symbols
示例#3
0
def CreateSectionSizesAndSymbols(
    map_path, elf_path, tool_prefix, output_directory,
    track_string_literals=True):
  """Creates sections sizes and symbols for a SizeInfo.

  Args:
    map_path: Path to the linker .map(.gz) file to parse.
    elf_path: Path to the corresponding unstripped ELF file. Used to find symbol
        aliases and inlined functions. Can be None.
    tool_prefix: Prefix for c++filt & nm (required).
    output_directory: Build output directory. If None, source_paths and symbol
        alias information will not be recorded.
    track_string_literals: Whether to break down "** merge string" sections into
        smaller symbols (requires output_directory).
  """
  source_mapper = None
  if output_directory:
    # Start by finding the elf_object_paths, so that nm can run on them while
    # the linker .map is being parsed.
    logging.info('Parsing ninja files.')
    source_mapper, elf_object_paths = ninja_parser.Parse(
        output_directory, elf_path)
    logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count)
    assert not elf_path or elf_object_paths, (
        'Failed to find link command in ninja files for ' +
        os.path.relpath(elf_path, output_directory))

  if elf_path:
    # Run nm on the elf file to retrieve the list of symbol names per-address.
    # This list is required because the .map file contains only a single name
    # for each address, yet multiple symbols are often coalesced when they are
    # identical. This coalescing happens mainly for small symbols and for C++
    # templates. Such symbols make up ~500kb of libchrome.so on Android.
    elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix)

    # Run nm on all .o/.a files to retrieve the symbol names within them.
    # The list is used to detect when mutiple .o files contain the same symbol
    # (e.g. inline functions), and to update the object_path / source_path
    # fields accordingly.
    # Looking in object files is required because the .map file choses a
    # single path for these symbols.
    # Rather than record all paths for each symbol, set the paths to be the
    # common ancestor of all paths.
    if output_directory:
      bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix, output_directory)
      bulk_analyzer.AnalyzePaths(elf_object_paths)

  logging.info('Parsing Linker Map')
  with _OpenMaybeGz(map_path) as map_file:
    section_sizes, raw_symbols = (
        linker_map_parser.MapFileParser().Parse(map_file))

  if elf_path:
    logging.debug('Validating section sizes')
    elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix)
    for k, v in elf_section_sizes.iteritems():
      if v != section_sizes.get(k):
        logging.error('ELF file and .map file do not agree on section sizes.')
        logging.error('.map file: %r', section_sizes)
        logging.error('readelf: %r', elf_section_sizes)
        sys.exit(1)

  if elf_path and output_directory:
    missed_object_paths = _DiscoverMissedObjectPaths(
        raw_symbols, elf_object_paths)
    bulk_analyzer.AnalyzePaths(missed_object_paths)
    bulk_analyzer.SortPaths()
    if track_string_literals:
      merge_string_syms = [s for s in raw_symbols if
                           s.full_name == '** merge strings' or
                           s.full_name == '** lld merge strings']
      # More likely for there to be a bug in supersize than an ELF to not have a
      # single string literal.
      assert merge_string_syms
      string_positions = [(s.address, s.size) for s in merge_string_syms]
      bulk_analyzer.AnalyzeStringLiterals(elf_path, string_positions)

  logging.info('Stripping linker prefixes from symbol names')
  _StripLinkerAddedSymbolPrefixes(raw_symbols)
  # Map file for some reason doesn't unmangle all names.
  # Unmangle prints its own log statement.
  _UnmangleRemainingSymbols(raw_symbols, tool_prefix)

  if elf_path:
    logging.info(
        'Adding symbols removed by identical code folding (as reported by nm)')
    # This normally does not block (it's finished by this time).
    names_by_address = elf_nm_result.get()
    _AddNmAliases(raw_symbols, names_by_address)

    if output_directory:
      object_paths_by_name = bulk_analyzer.GetSymbolNames()
      logging.debug('Fetched path information for %d symbols from %d files',
                    len(object_paths_by_name),
                    len(elf_object_paths) + len(missed_object_paths))

      # For aliases, this provides path information where there wasn't any.
      logging.info('Creating aliases for symbols shared by multiple paths')
      raw_symbols = _AssignNmAliasPathsAndCreatePathAliases(
          raw_symbols, object_paths_by_name)

      if track_string_literals:
        logging.info('Waiting for string literal extraction to complete.')
        list_of_positions_by_object_path = bulk_analyzer.GetStringPositions()
      bulk_analyzer.Close()

      if track_string_literals:
        logging.info('Deconstructing ** merge strings into literals')
        replacements = _CreateMergeStringsReplacements(merge_string_syms,
            list_of_positions_by_object_path)
        for merge_sym, literal_syms in itertools.izip(
            merge_string_syms, replacements):
          # Don't replace if no literals were found.
          if literal_syms:
            # Re-find the symbols since aliases cause their indices to change.
            idx = raw_symbols.index(merge_sym)
            # This assignment is a bit slow (causes array to be shifted), but
            # is fast enough since len(merge_string_syms) < 10.
            raw_symbols[idx:idx + 1] = literal_syms

  _ExtractSourcePathsAndNormalizeObjectPaths(raw_symbols, source_mapper)
  logging.info('Converting excessive aliases into shared-path symbols')
  _CompactLargeAliasesIntoSharedSymbols(raw_symbols)
  logging.debug('Connecting nm aliases')
  _ConnectNmAliases(raw_symbols)
  return section_sizes, raw_symbols