示例#1
0
def _CreateSizeInfo(aliases=None, containers=None):
    build_config = {}
    metadata = {}
    section_sizes = {'.text': 100, '.bss': 40}
    if not containers:
        containers = [
            models.Container('',
                             metadata=metadata,
                             section_sizes=section_sizes)
        ]
    models.BaseContainer.AssignShortNames(containers)
    TEXT = models.SECTION_TEXT
    symbols = [
        _MakeSym(models.SECTION_DEX_METHOD, 10, 'a', 'com.Foo#bar()'),
        _MakeSym(TEXT, 20, 'a', '.Lfoo'),
        _MakeSym(TEXT, 30, 'b'),
        _MakeSym(TEXT, 40, 'b'),
        _MakeSym(TEXT, 50, 'b'),
        _MakeSym(TEXT, 60, ''),
    ]
    for s in symbols:
        s.container = containers[0]
    if aliases:
        for tup in aliases:
            syms = symbols[tup[0]:tup[1]]
            for sym in syms:
                sym.aliases = syms
    return models.SizeInfo(build_config, containers, symbols)
示例#2
0
def _CreateSizeInfo(aliases=None):
    build_config = {}
    metadata = {}
    section_sizes = {'.text': 100, '.bss': 40}
    containers = [
        models.Container(name='',
                         metadata=metadata,
                         section_sizes=section_sizes)
    ]
    TEXT = models.SECTION_TEXT
    symbols = [
        _MakeSym(models.SECTION_DEX_METHOD, 10, 'a', 'com.Foo#bar()'),
        _MakeSym(TEXT, 20, 'a', '.Lfoo'),
        _MakeSym(TEXT, 30, 'b'),
        _MakeSym(TEXT, 40, 'b'),
        _MakeSym(TEXT, 50, 'b'),
        _MakeSym(TEXT, 60, ''),
    ]
    # For simplicity, not associating |symbols| with |containers|.
    if aliases:
        for tup in aliases:
            syms = symbols[tup[0]:tup[1]]
            for sym in syms:
                sym.aliases = syms
    return models.SizeInfo(build_config, containers, symbols)
示例#3
0
def Analyze(path, lazy_paths=None):
    """Returns a SizeInfo for the given |path|.

  Args:
    path: Can be a .size file, or a .map(.gz). If the latter, then lazy_paths
        must be provided as well.
  """
    if path.endswith('.size'):
        logging.debug('Loading results from: %s', path)
        size_info = file_format.LoadSizeInfo(path)
        # Recompute derived values (padding and function names).
        logging.info('Calculating padding')
        _RemoveDuplicatesAndCalculatePadding(size_info.symbols)
        logging.info('Deriving signatures')
        # Re-parse out function parameters.
        _NormalizeNames(size_info.symbols)
        return size_info
    elif not path.endswith('.map') and not path.endswith('.map.gz'):
        raise Exception('Expected input to be a .map or a .size')
    else:
        # output_directory needed for source file information.
        lazy_paths.VerifyOutputDirectory()
        # tool_prefix needed for c++filt.
        lazy_paths.VerifyToolPrefix()

        with _OpenMaybeGz(path) as map_file:
            section_sizes, symbols = linker_map_parser.MapFileParser().Parse(
                map_file)
        size_info = models.SizeInfo(section_sizes, models.SymbolGroup(symbols))

        # Map file for some reason doesn't unmangle all names.
        logging.info('Calculating padding')
        _RemoveDuplicatesAndCalculatePadding(size_info.symbols)
        # Unmangle prints its own log statement.
        _UnmangleRemainingSymbols(size_info.symbols, lazy_paths.tool_prefix)
        logging.info('Extracting source paths from .ninja files')
        all_found = _ExtractSourcePaths(size_info.symbols,
                                        lazy_paths.output_directory)
        assert all_found, (
            'One or more source file paths could not be found. Likely caused by '
            '.ninja files being generated at a different time than the .map file.'
        )
        # Resolve paths prints its own log statement.
        logging.info('Normalizing names')
        _NormalizeNames(size_info.symbols)
        logging.info('Normalizing paths')
        _NormalizeObjectPaths(size_info.symbols)

    if logging.getLogger().isEnabledFor(logging.INFO):
        for line in describe.DescribeSizeInfoCoverage(size_info):
            logging.info(line)
    logging.info('Finished analyzing %d symbols', len(size_info.symbols))
    return size_info
示例#4
0
def _CreateSizeInfo(aliases=None):
    section_sizes = {'.text': 100, '.bss': 40}
    TEXT = models.SECTION_TEXT
    symbols = [
        _MakeSym(models.SECTION_DEX_METHOD, 10, 'a', 'com.Foo#bar()'),
        _MakeSym(TEXT, 20, 'a', '.Lfoo'),
        _MakeSym(TEXT, 30, 'b'),
        _MakeSym(TEXT, 40, 'b'),
        _MakeSym(TEXT, 50, 'b'),
        _MakeSym(TEXT, 60, ''),
    ]
    if aliases:
        for tup in aliases:
            syms = symbols[tup[0]:tup[1]]
            for sym in syms:
                sym.aliases = syms
    return models.SizeInfo(section_sizes, symbols)
示例#5
0
def CreateSizeInfo(
    section_sizes, raw_symbols, metadata=None, normalize_names=True):
  """Performs operations on all symbols and creates a SizeInfo object."""
  # Padding not really required, but it is useful to check for large padding and
  # log a warning.
  logging.info('Calculating padding')
  _CalculatePadding(raw_symbols)

  # Do not call _NormalizeNames() during archive since that method tends to need
  # tweaks over time. Calling it only when loading .size files allows for more
  # flexability.
  if normalize_names:
    _NormalizeNames(raw_symbols)

  raw_symbols.sort(key=lambda s: (
      s.IsPak(), s.IsBss(), s.section_name, s.address))
  logging.info('Processed %d symbols', len(raw_symbols))
  return models.SizeInfo(section_sizes, raw_symbols, metadata=metadata)
示例#6
0
def Analyze(path, output_directory=None, tool_prefix=''):
    if path.endswith('.size'):
        logging.debug('Loading results from: %s', path)
        size_info = file_format.LoadSizeInfo(path)
        # Recompute derived values (padding and function names).
        logging.info('Calculating padding')
        _RemoveDuplicatesAndCalculatePadding(size_info.symbols)
        logging.info('Deriving signatures')
        # Re-parse out function parameters.
        _NormalizeNames(size_info.symbols)
        return size_info
    elif not path.endswith('.map') and not path.endswith('.map.gz'):
        raise Exception('Expected input to be a .map or a .size')
    else:
        # Verify tool_prefix early.
        output_directory, tool_prefix = (_DetectToolPrefix(
            tool_prefix, path, output_directory))

        with _OpenMaybeGz(path) as map_file:
            section_sizes, symbols = linker_map_parser.MapFileParser().Parse(
                map_file)
        timestamp = datetime.datetime.utcfromtimestamp(os.path.getmtime(path))
        size_info = models.SizeInfo(section_sizes,
                                    models.SymbolGroup(symbols),
                                    timestamp=timestamp)

        # Map file for some reason doesn't unmangle all names.
        logging.info('Calculating padding')
        _RemoveDuplicatesAndCalculatePadding(size_info.symbols)
        # Unmangle prints its own log statement.
        _UnmangleRemainingSymbols(size_info.symbols, tool_prefix)
        logging.info('Extracting source paths from .ninja files')
        _ExtractSourcePaths(size_info.symbols, output_directory)
        # Resolve paths prints its own log statement.
        logging.info('Normalizing names')
        _NormalizeNames(size_info.symbols)
        logging.info('Normalizing paths')
        _NormalizeObjectPaths(size_info.symbols)

    if logging.getLogger().isEnabledFor(logging.INFO):
        for line in describe.DescribeSizeInfoCoverage(size_info):
            logging.info(line)
    logging.info('Finished analyzing %d symbols', len(size_info.symbols))
    return size_info
示例#7
0
def _LoadSizeInfoFromFile(file_obj, size_path):
    """Loads a size_info from the given file.

  See _SaveSizeInfoToFile for details on the .size file format.

  Args:
    file_obj: File to read, should be a GzipFile
  """
    lines = iter(file_obj)
    _ReadLine(lines)  # Line 0: Created by supersize header
    actual_version = _ReadLine(lines)
    assert actual_version == _SERIALIZATION_VERSION, (
        'Version mismatch. Need to write some upgrade code.')
    # JSON metadata
    json_len = int(_ReadLine(lines))
    json_str = file_obj.read(json_len)

    headers = json.loads(json_str)
    section_sizes = headers['section_sizes']
    metadata = headers.get('metadata')
    has_components = headers.get('has_components', False)
    lines = iter(file_obj)
    _ReadLine(lines)

    # Path list
    num_path_tuples = int(_ReadLine(lines))  # Line 4 - number of paths in list
    # Read the path list values and store for later
    path_tuples = [
        _ReadValuesFromLine(lines, split='\t') for _ in xrange(num_path_tuples)
    ]

    # Component list
    if has_components:
        num_components = int(_ReadLine(lines))  # number of components in list
        components = [_ReadLine(lines) for _ in xrange(num_components)]

    # Symbol counts by section.
    section_names = _ReadValuesFromLine(lines, split='\t')
    section_counts = [int(c) for c in _ReadValuesFromLine(lines, split='\t')]

    # Addresses, sizes, path indicies, component indicies
    def read_numeric(delta=False):
        """Read numeric values, where each line corresponds to a symbol group.

    The values in each line are space seperated.
    If |delta| is True, the numbers are read as a value to add to the sum of the
    prior values in the line, or as the amount to change by.
    """
        ret = []
        delta_multiplier = int(delta)
        for _ in section_counts:
            value = 0
            fields = []
            for f in _ReadValuesFromLine(lines, split=' '):
                value = value * delta_multiplier + int(f)
                fields.append(value)
            ret.append(fields)
        return ret

    addresses = read_numeric(delta=True)
    sizes = read_numeric(delta=False)
    path_indices = read_numeric(delta=True)
    if has_components:
        component_indices = read_numeric(delta=True)
    else:
        component_indices = [None] * len(section_names)

    raw_symbols = [None] * sum(section_counts)
    symbol_idx = 0
    for (cur_section_name, cur_section_count, cur_addresses, cur_sizes,
         cur_path_indicies, cur_component_indices) in itertools.izip(
             section_names, section_counts, addresses, sizes, path_indices,
             component_indices):
        alias_counter = 0
        for i in xrange(cur_section_count):
            parts = _ReadValuesFromLine(lines, split='\t')
            full_name = parts[0]
            flags_part = None
            aliases_part = None

            # aliases_part or flags_part may have been omitted.
            if len(parts) == 3:
                # full_name  aliases_part  flags_part
                aliases_part = parts[1]
                flags_part = parts[2]
            elif len(parts) == 2:
                if parts[1][0] == '0':
                    # full_name  aliases_part
                    aliases_part = parts[1]
                else:
                    # full_name  flags_part
                    flags_part = parts[1]

            # Use a bit less RAM by using the same instance for this common string.
            if full_name == models.STRING_LITERAL_NAME:
                full_name = models.STRING_LITERAL_NAME
            flags = int(flags_part, 16) if flags_part else 0
            num_aliases = int(aliases_part, 16) if aliases_part else 0

            # Skip the constructor to avoid default value checks
            new_sym = models.Symbol.__new__(models.Symbol)
            new_sym.section_name = cur_section_name
            new_sym.full_name = full_name
            new_sym.address = cur_addresses[i]
            new_sym.size = cur_sizes[i]
            paths = path_tuples[cur_path_indicies[i]]
            new_sym.object_path, new_sym.source_path = paths
            component = components[
                cur_component_indices[i]] if has_components else ''
            new_sym.component = component
            new_sym.flags = flags
            # Derived
            new_sym.padding = 0
            new_sym.template_name = ''
            new_sym.name = ''

            if num_aliases:
                assert alias_counter == 0
                new_sym.aliases = [new_sym]
                alias_counter = num_aliases - 1
            elif alias_counter > 0:
                new_sym.aliases = raw_symbols[symbol_idx - 1].aliases
                new_sym.aliases.append(new_sym)
                alias_counter -= 1
            else:
                new_sym.aliases = None

            raw_symbols[symbol_idx] = new_sym
            symbol_idx += 1

    return models.SizeInfo(section_sizes,
                           raw_symbols,
                           metadata=metadata,
                           size_path=size_path)
示例#8
0
def _LoadSizeInfoFromFile(file_obj, size_path):
    """Loads a size_info from the given file.

  See _SaveSizeInfoToFile() for details on the .size file format.

  Args:
    file_obj: File to read, should be a GzipFile
  """
    # Split lines on '\n', since '\r' can appear in some lines!
    lines = io.TextIOWrapper(file_obj, newline='\n')
    header_line = _ReadLine(lines).encode('ascii')
    assert header_line == _COMMON_HEADER[:-1], 'was ' + str(header_line)
    header_line = _ReadLine(lines).encode('ascii')
    if header_line == _SIZE_HEADER_SINGLE_CONTAINER[:-1]:
        has_multi_containers = False
    elif header_line == _SIZE_HEADER_MULTI_CONTAINER[:-1]:
        has_multi_containers = True
    else:
        raise ValueError('Version mismatch. Need to write some upgrade code.')

    # JSON header fields
    json_len = int(_ReadLine(lines))
    json_str = lines.read(json_len)

    fields = json.loads(json_str)
    assert ('containers' in fields) == has_multi_containers
    assert ('build_config' in fields) == has_multi_containers
    assert ('containers' in fields) == has_multi_containers
    assert ('metadata' not in fields) == has_multi_containers
    assert ('section_sizes' not in fields) == has_multi_containers

    containers = []
    if has_multi_containers:  # New format.
        build_config = fields['build_config']
        for cfield in fields['containers']:
            c = models.Container(name=cfield['name'],
                                 metadata=cfield['metadata'],
                                 section_sizes=cfield['section_sizes'])
            containers.append(c)
    else:  # Old format.
        build_config = {}
        metadata = fields.get('metadata')
        if metadata:
            for key in models.BUILD_CONFIG_KEYS:
                if key in metadata:
                    build_config[key] = metadata[key]
                    del metadata[key]
        section_sizes = fields['section_sizes']
        containers.append(
            models.Container(name='',
                             metadata=metadata,
                             section_sizes=section_sizes))
    models.Container.AssignShortNames(containers)

    has_components = fields.get('has_components', False)
    has_padding = fields.get('has_padding', False)

    # Eat empty line.
    _ReadLine(lines)

    # Path list.
    num_path_tuples = int(_ReadLine(lines))  # Number of paths in list.
    # Read the path list values and store for later.
    path_tuples = [
        _ReadValuesFromLine(lines, split='\t') for _ in range(num_path_tuples)
    ]

    if num_path_tuples == 0:
        logging.warning('File contains no symbols: %s', size_path)
        return models.SizeInfo(build_config,
                               containers, [],
                               size_path=size_path)

    # Component list.
    if has_components:
        num_components = int(_ReadLine(lines))  # Number of components in list.
        components = [_ReadLine(lines) for _ in range(num_components)]

    # Symbol counts by "segments", defined as (container, section) tuples.
    segment_names = _ReadValuesFromLine(lines, split='\t')
    symbol_counts = [int(c) for c in _ReadValuesFromLine(lines, split='\t')]

    # Addresses, sizes, paddings, path indices, component indices.
    def read_numeric(delta=False):
        """Read numeric values, where each line corresponds to a symbol group.

    The values in each line are space separated.
    If |delta| is True, the numbers are read as a value to add to the sum of the
    prior values in the line, or as the amount to change by.
    """
        ret = []
        delta_multiplier = int(delta)
        for _ in symbol_counts:
            value = 0
            fields = []
            for f in _ReadValuesFromLine(lines, split=' '):
                value = value * delta_multiplier + int(f)
                fields.append(value)
            ret.append(fields)
        return ret

    addresses = read_numeric(delta=True)
    sizes = read_numeric(delta=False)
    if has_padding:
        paddings = read_numeric(delta=False)
    else:
        paddings = [None] * len(segment_names)
    path_indices = read_numeric(delta=True)
    if has_components:
        component_indices = read_numeric(delta=True)
    else:
        component_indices = [None] * len(segment_names)

    raw_symbols = [None] * sum(symbol_counts)
    symbol_idx = 0
    for (cur_segment_name, cur_symbol_count, cur_addresses, cur_sizes,
         cur_paddings, cur_path_indices,
         cur_component_indices) in zip(segment_names, symbol_counts, addresses,
                                       sizes, paddings, path_indices,
                                       component_indices):
        if has_multi_containers:
            # Extract '<cur_container_idx_str>cur_section_name'.
            assert cur_segment_name.startswith('<')
            cur_container_idx_str, cur_section_name = (
                cur_segment_name[1:].split('>', 1))
            cur_container = containers[int(cur_container_idx_str)]
        else:
            cur_section_name = cur_segment_name
            cur_container = containers[0]
        alias_counter = 0
        for i in range(cur_symbol_count):
            parts = _ReadValuesFromLine(lines, split='\t')
            full_name = parts[0]
            flags_part = None
            aliases_part = None

            # aliases_part or flags_part may have been omitted.
            if len(parts) == 3:
                # full_name  aliases_part  flags_part
                aliases_part = parts[1]
                flags_part = parts[2]
            elif len(parts) == 2:
                if parts[1][0] == '0':
                    # full_name  aliases_part
                    aliases_part = parts[1]
                else:
                    # full_name  flags_part
                    flags_part = parts[1]

            # Use a bit less RAM by using the same instance for this common string.
            if full_name == models.STRING_LITERAL_NAME:
                full_name = models.STRING_LITERAL_NAME
            flags = int(flags_part, 16) if flags_part else 0
            num_aliases = int(aliases_part, 16) if aliases_part else 0

            # Skip the constructor to avoid default value checks.
            new_sym = models.Symbol.__new__(models.Symbol)
            new_sym.container = cur_container
            new_sym.section_name = cur_section_name
            new_sym.full_name = full_name
            new_sym.address = cur_addresses[i]
            new_sym.size = cur_sizes[i]
            paths = path_tuples[cur_path_indices[i]]
            new_sym.object_path, new_sym.source_path = paths
            component = components[
                cur_component_indices[i]] if has_components else ''
            new_sym.component = component
            new_sym.flags = flags
            # Derived.
            if cur_paddings:
                new_sym.padding = cur_paddings[i]
                if not new_sym.IsOverhead():
                    new_sym.size += new_sym.padding
            else:
                new_sym.padding = 0  # Computed below.
            new_sym.template_name = ''
            new_sym.name = ''

            if num_aliases:
                assert alias_counter == 0
                new_sym.aliases = [new_sym]
                alias_counter = num_aliases - 1
            elif alias_counter > 0:
                new_sym.aliases = raw_symbols[symbol_idx - 1].aliases
                new_sym.aliases.append(new_sym)
                alias_counter -= 1
            else:
                new_sym.aliases = None

            raw_symbols[symbol_idx] = new_sym
            symbol_idx += 1

    if not has_padding:
        CalculatePadding(raw_symbols)

    return models.SizeInfo(build_config,
                           containers,
                           raw_symbols,
                           size_path=size_path)
示例#9
0
def _LoadSizeInfoFromFile(file_obj):
  """Loads a size_info from the given file."""
  lines = iter(file_obj)
  next(lines)  # Comment line.
  actual_version = next(lines)[:-1]
  assert actual_version == _SERIALIZATION_VERSION, (
      'Version mismatch. Need to write some upgrade code.')
  json_len = int(next(lines))
  json_str = file_obj.read(json_len)
  headers = json.loads(json_str)
  section_sizes = headers['section_sizes']
  metadata = headers.get('metadata')
  lines = iter(file_obj)
  next(lines)  # newline after closing } of json.

  num_path_tuples = int(next(lines))
  path_tuples = [None] * num_path_tuples
  for i in xrange(num_path_tuples):
    path_tuples[i] = next(lines)[:-1].split('\t')

  section_names = next(lines)[:-1].split('\t')
  section_counts = [int(c) for c in next(lines)[:-1].split('\t')]

  def read_numeric(delta=False):
    ret = []
    delta_multiplier = int(delta)
    for _ in section_counts:
      value = 0
      fields = next(lines).split(' ')
      for i, f in enumerate(fields):
        value = value * delta_multiplier + int(f)
        fields[i] = value
      ret.append(fields)
    return ret

  addresses = read_numeric(delta=True)
  sizes = read_numeric(delta=False)
  path_indices = read_numeric(delta=True)

  symbol_list = [None] * sum(section_counts)
  symbol_idx = 0
  for section_index, cur_section_name in enumerate(section_names):
    for i in xrange(section_counts[section_index]):
      line = next(lines)[:-1]
      is_anonymous = line.endswith('\t1')
      name = line[:-2] if is_anonymous else line

      new_sym = models.Symbol.__new__(models.Symbol)
      new_sym.section_name = cur_section_name
      new_sym.address = addresses[section_index][i]
      new_sym.size = sizes[section_index][i]
      new_sym.name = name
      paths = path_tuples[path_indices[section_index][i]]
      new_sym.object_path = paths[0]
      new_sym.source_path = paths[1]
      new_sym.is_anonymous = is_anonymous
      new_sym.padding = 0  # Derived
      new_sym.full_name = None  # Derived
      symbol_list[symbol_idx] = new_sym
      symbol_idx += 1

  symbols = models.SymbolGroup(symbol_list)
  return models.SizeInfo(section_sizes, symbols, metadata=metadata)
示例#10
0
def CreateSizeInfo(map_path,
                   elf_path,
                   tool_prefix,
                   output_directory,
                   normalize_names=True):
    """Creates a SizeInfo.

  Args:
    map_path: Path to the linker .map(.gz) file to parse.
    elf_path: Path to the corresponding unstripped ELF file. Used to find symbol
        aliases and inlined functions. Can be None.
    tool_prefix: Prefix for c++filt & nm (required).
    output_directory: Build output directory. If None, source_paths and symbol
        alias information will not be recorded.
  """
    source_mapper = None
    if output_directory:
        # Start by finding the elf_object_paths, so that nm can run on them while
        # the linker .map is being parsed.
        logging.info('Parsing ninja files.')
        source_mapper, elf_object_paths = ninja_parser.Parse(
            output_directory, elf_path)
        logging.debug('Parsed %d .ninja files.',
                      source_mapper.parsed_file_count)
        assert not elf_path or elf_object_paths, (
            'Failed to find link command in ninja files for ' +
            os.path.relpath(elf_path, output_directory))

    if elf_path:
        # Run nm on the elf file to retrieve the list of symbol names per-address.
        # This list is required because the .map file contains only a single name
        # for each address, yet multiple symbols are often coalesced when they are
        # identical. This coalescing happens mainly for small symbols and for C++
        # templates. Such symbols make up ~500kb of libchrome.so on Android.
        elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix)

        # Run nm on all .o/.a files to retrieve the symbol names within them.
        # The list is used to detect when mutiple .o files contain the same symbol
        # (e.g. inline functions), and to update the object_path / source_path
        # fields accordingly.
        # Looking in object files is required because the .map file choses a
        # single path for these symbols.
        # Rather than record all paths for each symbol, set the paths to be the
        # common ancestor of all paths.
        if output_directory:
            bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix,
                                                      output_directory)
            bulk_analyzer.AnalyzePaths(elf_object_paths)

    logging.info('Parsing Linker Map')
    with _OpenMaybeGz(map_path) as map_file:
        section_sizes, raw_symbols = (
            linker_map_parser.MapFileParser().Parse(map_file))

    if elf_path:
        logging.debug('Validating section sizes')
        elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix)
        for k, v in elf_section_sizes.iteritems():
            if v != section_sizes.get(k):
                logging.error(
                    'ELF file and .map file do not agree on section sizes.')
                logging.error('.map file: %r', section_sizes)
                logging.error('readelf: %r', elf_section_sizes)
                sys.exit(1)

    if elf_path and output_directory:
        missed_object_paths = _DiscoverMissedObjectPaths(
            raw_symbols, elf_object_paths)
        bulk_analyzer.AnalyzePaths(missed_object_paths)
        bulk_analyzer.Close()

    if source_mapper:
        logging.info('Looking up source paths from ninja files')
        _ExtractSourcePaths(raw_symbols, source_mapper)
        assert source_mapper.unmatched_paths_count == 0, (
            'One or more source file paths could not be found. Likely caused by '
            '.ninja files being generated at a different time than the .map file.'
        )

    logging.info('Stripping linker prefixes from symbol names')
    _StripLinkerAddedSymbolPrefixes(raw_symbols)
    # Map file for some reason doesn't unmangle all names.
    # Unmangle prints its own log statement.
    _UnmangleRemainingSymbols(raw_symbols, tool_prefix)

    if elf_path:
        logging.info('Adding aliased symbols, as reported by nm')
        # This normally does not block (it's finished by this time).
        aliases_by_address = elf_nm_result.get()
        _AddSymbolAliases(raw_symbols, aliases_by_address)

        if output_directory:
            # For aliases, this provides path information where there wasn't any.
            logging.info('Computing ancestor paths for inline functions and '
                         'normalizing object paths')

            object_paths_by_name = bulk_analyzer.Get()
            logging.debug(
                'Fetched path information for %d symbols from %d files',
                len(object_paths_by_name),
                len(elf_object_paths) + len(missed_object_paths))
            _ComputeAncestorPathsAndNormalizeObjectPaths(
                raw_symbols, object_paths_by_name, source_mapper)

    if not elf_path or not output_directory:
        logging.info('Normalizing object paths.')
        for symbol in raw_symbols:
            symbol.object_path = _NormalizeObjectPath(symbol.object_path)

    # Padding not really required, but it is useful to check for large padding and
    # log a warning.
    logging.info('Calculating padding')
    _CalculatePadding(raw_symbols)

    # Do not call _NormalizeNames() during archive since that method tends to need
    # tweaks over time. Calling it only when loading .size files allows for more
    # flexability.
    if normalize_names:
        _NormalizeNames(raw_symbols)

    logging.info('Processed %d symbols', len(raw_symbols))
    size_info = models.SizeInfo(section_sizes, raw_symbols)

    if logging.getLogger().isEnabledFor(logging.INFO):
        for line in describe.DescribeSizeInfoCoverage(size_info):
            logging.info(line)
    logging.info('Recorded info for %d symbols', len(size_info.raw_symbols))
    return size_info
示例#11
0
def _LoadSizeInfoFromFile(file_obj, size_path):
  """Loads a size_info from the given file."""
  lines = iter(file_obj)
  next(lines)  # Comment line.
  actual_version = next(lines)[:-1]
  assert actual_version == _SERIALIZATION_VERSION, (
      'Version mismatch. Need to write some upgrade code.')
  json_len = int(next(lines))
  json_str = file_obj.read(json_len)
  headers = json.loads(json_str)
  section_sizes = headers['section_sizes']
  metadata = headers.get('metadata')
  lines = iter(file_obj)
  next(lines)  # newline after closing } of json.

  num_path_tuples = int(next(lines))
  path_tuples = [None] * num_path_tuples
  for i in xrange(num_path_tuples):
    path_tuples[i] = next(lines)[:-1].split('\t')

  section_names = next(lines)[:-1].split('\t')
  section_counts = [int(c) for c in next(lines)[:-1].split('\t')]

  def read_numeric(delta=False):
    ret = []
    delta_multiplier = int(delta)
    for _ in section_counts:
      value = 0
      fields = next(lines).split(' ')
      for i, f in enumerate(fields):
        value = value * delta_multiplier + int(f)
        fields[i] = value
      ret.append(fields)
    return ret

  addresses = read_numeric(delta=True)
  sizes = read_numeric(delta=False)
  path_indices = read_numeric(delta=True)

  raw_symbols = [None] * sum(section_counts)
  symbol_idx = 0
  for section_index, cur_section_name in enumerate(section_names):
    alias_counter = 0
    for i in xrange(section_counts[section_index]):
      parts = next(lines)[:-1].split('\t')
      flags_part = None
      aliases_part = None

      if len(parts) == 3:
        aliases_part = parts[1]
        flags_part = parts[2]
      elif len(parts) == 2:
        if parts[1][0] == '0':
          aliases_part = parts[1]
        else:
          flags_part = parts[1]

      full_name = parts[0]
      # Use a bit less RAM by using the same instance for this common string.
      if full_name == models.STRING_LITERAL_NAME:
        full_name = models.STRING_LITERAL_NAME
      flags = int(flags_part, 16) if flags_part else 0
      num_aliases = int(aliases_part, 16) if aliases_part else 0

      new_sym = models.Symbol.__new__(models.Symbol)
      new_sym.section_name = cur_section_name
      new_sym.address = addresses[section_index][i]
      new_sym.size = sizes[section_index][i]
      new_sym.full_name = full_name
      paths = path_tuples[path_indices[section_index][i]]
      new_sym.object_path = paths[0]
      new_sym.source_path = paths[1]
      new_sym.flags = flags
      new_sym.padding = 0  # Derived
      new_sym.template_name = ''  # Derived
      new_sym.name = ''  # Derived

      if num_aliases:
        assert alias_counter == 0
        new_sym.aliases = [new_sym]
        alias_counter = num_aliases - 1
      elif alias_counter > 0:
        new_sym.aliases = raw_symbols[symbol_idx - 1].aliases
        new_sym.aliases.append(new_sym)
        alias_counter -= 1
      else:
        new_sym.aliases = None

      raw_symbols[symbol_idx] = new_sym
      symbol_idx += 1

  return models.SizeInfo(section_sizes, raw_symbols, metadata=metadata,
                         size_path=size_path)
示例#12
0
def _LoadSizeInfoFromFile(file_obj, size_path):
    """Loads a size_info from the given file.

  See _SaveSizeInfoToFile() for details on the .size file format.

  Args:
    file_obj: File to read, should be a GzipFile
  """
    # Split lines on '\n', since '\r' can appear in some lines!
    lines = io.TextIOWrapper(file_obj, newline='\n')
    _ReadLine(lines)  # Line 0: Created by supersize header
    actual_version = _ReadLine(lines)
    assert actual_version == _SERIALIZATION_VERSION, (
        'Version mismatch. Need to write some upgrade code.')
    # JSON header fields
    json_len = int(_ReadLine(lines))
    json_str = lines.read(json_len)

    fields = json.loads(json_str)

    has_multi_containers = False

    containers = []
    if has_multi_containers:  # New format.
        raise ValueError('Multiple container not yet supported.')
    else:
        # Parse old format, but separate data into build_config and metadata.
        build_config = {}
        metadata = fields.get('metadata')
        if metadata:
            for key in models.BUILD_CONFIG_KEYS:
                if key in metadata:
                    build_config[key] = metadata[key]
                    del metadata[key]
        section_sizes = fields['section_sizes']
        containers.append(
            models.Container(name='',
                             metadata=metadata,
                             section_sizes=section_sizes))

    has_components = fields.get('has_components', False)
    has_padding = fields.get('has_padding', False)

    # Eat empty line.
    _ReadLine(lines)

    # Path list
    num_path_tuples = int(_ReadLine(lines))  # Number of paths in list
    # Read the path list values and store for later
    path_tuples = [
        _ReadValuesFromLine(lines, split='\t') for _ in range(num_path_tuples)
    ]

    # Component list
    if has_components:
        num_components = int(_ReadLine(lines))  # number of components in list
        components = [_ReadLine(lines) for _ in range(num_components)]

    # Symbol counts by section.
    section_names = _ReadValuesFromLine(lines, split='\t')
    symbol_counts = [int(c) for c in _ReadValuesFromLine(lines, split='\t')]

    # Addresses, sizes, paddings, path indices, component indices
    def read_numeric(delta=False):
        """Read numeric values, where each line corresponds to a symbol group.

    The values in each line are space separated.
    If |delta| is True, the numbers are read as a value to add to the sum of the
    prior values in the line, or as the amount to change by.
    """
        ret = []
        delta_multiplier = int(delta)
        for _ in symbol_counts:
            value = 0
            fields = []
            for f in _ReadValuesFromLine(lines, split=' '):
                value = value * delta_multiplier + int(f)
                fields.append(value)
            ret.append(fields)
        return ret

    addresses = read_numeric(delta=True)
    sizes = read_numeric(delta=False)
    if has_padding:
        paddings = read_numeric(delta=False)
    else:
        paddings = [None] * len(section_names)
    path_indices = read_numeric(delta=True)
    if has_components:
        component_indices = read_numeric(delta=True)
    else:
        component_indices = [None] * len(section_names)

    raw_symbols = [None] * sum(symbol_counts)
    symbol_idx = 0
    for (cur_section_name, cur_symbol_count, cur_addresses, cur_sizes,
         cur_paddings, cur_path_indices,
         cur_component_indices) in zip(section_names, symbol_counts, addresses,
                                       sizes, paddings, path_indices,
                                       component_indices):
        if has_multi_containers:
            raise ValueError('Multiple container not yet supported.')
        else:
            cur_container = containers[0]
        alias_counter = 0
        for i in range(cur_symbol_count):
            parts = _ReadValuesFromLine(lines, split='\t')
            full_name = parts[0]
            flags_part = None
            aliases_part = None

            # aliases_part or flags_part may have been omitted.
            if len(parts) == 3:
                # full_name  aliases_part  flags_part
                aliases_part = parts[1]
                flags_part = parts[2]
            elif len(parts) == 2:
                if parts[1][0] == '0':
                    # full_name  aliases_part
                    aliases_part = parts[1]
                else:
                    # full_name  flags_part
                    flags_part = parts[1]

            # Use a bit less RAM by using the same instance for this common string.
            if full_name == models.STRING_LITERAL_NAME:
                full_name = models.STRING_LITERAL_NAME
            flags = int(flags_part, 16) if flags_part else 0
            num_aliases = int(aliases_part, 16) if aliases_part else 0

            # Skip the constructor to avoid default value checks
            new_sym = models.Symbol.__new__(models.Symbol)
            new_sym.container = cur_container
            new_sym.section_name = cur_section_name
            new_sym.full_name = full_name
            new_sym.address = cur_addresses[i]
            new_sym.size = cur_sizes[i]
            paths = path_tuples[cur_path_indices[i]]
            new_sym.object_path, new_sym.source_path = paths
            component = components[
                cur_component_indices[i]] if has_components else ''
            new_sym.component = component
            new_sym.flags = flags
            # Derived
            if cur_paddings:
                new_sym.padding = cur_paddings[i]
                new_sym.size += new_sym.padding
            else:
                # This will be computed during CreateSizeInfo()
                new_sym.padding = 0
            new_sym.template_name = ''
            new_sym.name = ''

            if num_aliases:
                assert alias_counter == 0
                new_sym.aliases = [new_sym]
                alias_counter = num_aliases - 1
            elif alias_counter > 0:
                new_sym.aliases = raw_symbols[symbol_idx - 1].aliases
                new_sym.aliases.append(new_sym)
                alias_counter -= 1
            else:
                new_sym.aliases = None

            raw_symbols[symbol_idx] = new_sym
            symbol_idx += 1

    if not has_padding:
        CalculatePadding(raw_symbols)

    return models.SizeInfo(build_config,
                           containers,
                           raw_symbols,
                           size_path=size_path)
示例#13
0
def CreateSizeInfo(map_path,
                   elf_path,
                   tool_prefix,
                   output_directory,
                   normalize_names=True,
                   track_string_literals=True):
    """Creates a SizeInfo.

  Args:
    map_path: Path to the linker .map(.gz) file to parse.
    elf_path: Path to the corresponding unstripped ELF file. Used to find symbol
        aliases and inlined functions. Can be None.
    tool_prefix: Prefix for c++filt & nm (required).
    output_directory: Build output directory. If None, source_paths and symbol
        alias information will not be recorded.
    normalize_names: Whether to normalize symbol names.
    track_string_literals: Whether to break down "** merge string" sections into
        smaller symbols (requires output_directory).
  """
    source_mapper = None
    if output_directory:
        # Start by finding the elf_object_paths, so that nm can run on them while
        # the linker .map is being parsed.
        logging.info('Parsing ninja files.')
        source_mapper, elf_object_paths = ninja_parser.Parse(
            output_directory, elf_path)
        logging.debug('Parsed %d .ninja files.',
                      source_mapper.parsed_file_count)
        assert not elf_path or elf_object_paths, (
            'Failed to find link command in ninja files for ' +
            os.path.relpath(elf_path, output_directory))

    if elf_path:
        # Run nm on the elf file to retrieve the list of symbol names per-address.
        # This list is required because the .map file contains only a single name
        # for each address, yet multiple symbols are often coalesced when they are
        # identical. This coalescing happens mainly for small symbols and for C++
        # templates. Such symbols make up ~500kb of libchrome.so on Android.
        elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix)

        # Run nm on all .o/.a files to retrieve the symbol names within them.
        # The list is used to detect when mutiple .o files contain the same symbol
        # (e.g. inline functions), and to update the object_path / source_path
        # fields accordingly.
        # Looking in object files is required because the .map file choses a
        # single path for these symbols.
        # Rather than record all paths for each symbol, set the paths to be the
        # common ancestor of all paths.
        if output_directory:
            bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix,
                                                      output_directory)
            bulk_analyzer.AnalyzePaths(elf_object_paths)

    logging.info('Parsing Linker Map')
    with _OpenMaybeGz(map_path) as map_file:
        section_sizes, raw_symbols = (
            linker_map_parser.MapFileParser().Parse(map_file))

    if elf_path:
        logging.debug('Validating section sizes')
        elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix)
        for k, v in elf_section_sizes.iteritems():
            if v != section_sizes.get(k):
                logging.error(
                    'ELF file and .map file do not agree on section sizes.')
                logging.error('.map file: %r', section_sizes)
                logging.error('readelf: %r', elf_section_sizes)
                sys.exit(1)

    if elf_path and output_directory:
        missed_object_paths = _DiscoverMissedObjectPaths(
            raw_symbols, elf_object_paths)
        bulk_analyzer.AnalyzePaths(missed_object_paths)
        bulk_analyzer.SortPaths()
        if track_string_literals:
            merge_string_syms = [
                s for s in raw_symbols if s.full_name == '** merge strings'
                or s.full_name == '** lld merge strings'
            ]
            # More likely for there to be a bug in supersize than an ELF to not have a
            # single string literal.
            assert merge_string_syms
            string_positions = [(s.address, s.size) for s in merge_string_syms]
            bulk_analyzer.AnalyzeStringLiterals(elf_path, string_positions)

    logging.info('Stripping linker prefixes from symbol names')
    _StripLinkerAddedSymbolPrefixes(raw_symbols)
    # Map file for some reason doesn't unmangle all names.
    # Unmangle prints its own log statement.
    _UnmangleRemainingSymbols(raw_symbols, tool_prefix)

    if elf_path:
        logging.info(
            'Adding symbols removed by identical code folding (as reported by nm)'
        )
        # This normally does not block (it's finished by this time).
        names_by_address = elf_nm_result.get()
        _AddNmAliases(raw_symbols, names_by_address)

        if output_directory:
            object_paths_by_name = bulk_analyzer.GetSymbolNames()
            logging.debug(
                'Fetched path information for %d symbols from %d files',
                len(object_paths_by_name),
                len(elf_object_paths) + len(missed_object_paths))

            # For aliases, this provides path information where there wasn't any.
            logging.info(
                'Creating aliases for symbols shared by multiple paths')
            raw_symbols = _AssignNmAliasPathsAndCreatePathAliases(
                raw_symbols, object_paths_by_name)

            if track_string_literals:
                logging.info(
                    'Waiting for string literal extraction to complete.')
                list_of_positions_by_object_path = bulk_analyzer.GetStringPositions(
                )
            bulk_analyzer.Close()

            if track_string_literals:
                logging.info('Deconstructing ** merge strings into literals')
                replacements = _CreateMergeStringsReplacements(
                    merge_string_syms, list_of_positions_by_object_path)
                for merge_sym, literal_syms in itertools.izip(
                        merge_string_syms, replacements):
                    # Don't replace if no literals were found.
                    if literal_syms:
                        # Re-find the symbols since aliases cause their indices to change.
                        idx = raw_symbols.index(merge_sym)
                        # This assignment is a bit slow (causes array to be shifted), but
                        # is fast enough since len(merge_string_syms) < 10.
                        raw_symbols[idx:idx + 1] = literal_syms

    _ExtractSourcePathsAndNormalizeObjectPaths(raw_symbols, source_mapper)
    logging.info('Converting excessive aliases into shared-path symbols')
    _CompactLargeAliasesIntoSharedSymbols(raw_symbols)
    logging.debug('Connecting nm aliases')
    _ConnectNmAliases(raw_symbols)

    # Padding not really required, but it is useful to check for large padding and
    # log a warning.
    logging.info('Calculating padding')
    _CalculatePadding(raw_symbols)

    # Do not call _NormalizeNames() during archive since that method tends to need
    # tweaks over time. Calling it only when loading .size files allows for more
    # flexability.
    if normalize_names:
        _NormalizeNames(raw_symbols)

    logging.info('Processed %d symbols', len(raw_symbols))
    size_info = models.SizeInfo(section_sizes, raw_symbols)

    if logging.getLogger().isEnabledFor(logging.INFO):
        for line in describe.DescribeSizeInfoCoverage(size_info):
            logging.info(line)
    logging.info('Recorded info for %d symbols', len(size_info.raw_symbols))
    return size_info