示例#1
0
 def _collect_addrs(self):
     """Read perf.data, collect all addresses we need to convert to
        source file:line.
     """
     for perf_data in self.config['perf_data_list']:
         lib = ReportLib()
         lib.SetRecordFile(perf_data)
         if self.symfs_dir:
             lib.SetSymfs(self.symfs_dir)
         if self.kallsyms:
             lib.SetKallsymsFile(self.kallsyms)
         while True:
             sample = lib.GetNextSample()
             if sample is None:
                 lib.Close()
                 break
             if not self._filter_sample(sample):
                 continue
             symbols = []
             symbols.append(lib.GetSymbolOfCurrentSample())
             callchain = lib.GetCallChainOfCurrentSample()
             for i in range(callchain.nr):
                 symbols.append(callchain.entries[i].symbol)
             for symbol in symbols:
                 if self._filter_symbol(symbol):
                     self.addr2line.add_addr(symbol.dso_name,
                                             symbol.symbol_addr,
                                             symbol.vaddr_in_file)
                     self.addr2line.add_addr(symbol.dso_name,
                                             symbol.symbol_addr,
                                             symbol.symbol_addr)
示例#2
0
    def __init__(self, config):
        self.config = config
        self.lib = ReportLib()

        config['binary_cache_dir'] = 'binary_cache'
        if not os.path.isdir(config['binary_cache_dir']):
            config['binary_cache_dir'] = None
        else:
            self.lib.SetSymfs(config['binary_cache_dir'])
        if config.get('perf_data_path'):
            self.lib.SetRecordFile(config['perf_data_path'])
        kallsyms = 'binary_cache/kallsyms'
        if os.path.isfile(kallsyms):
            self.lib.SetKallsymsFile(kallsyms)
        if config.get('show_art_frames'):
            self.lib.ShowArtFrames()
        self.comm_filter = set(
            config['comm_filters']) if config.get('comm_filters') else None
        if config.get('pid_filters'):
            self.pid_filter = {int(x) for x in config['pid_filters']}
        else:
            self.pid_filter = None
        if config.get('tid_filters'):
            self.tid_filter = {int(x) for x in config['tid_filters']}
        else:
            self.tid_filter = None
        self.dso_filter = set(
            config['dso_filters']) if config.get('dso_filters') else None
        self.max_chain_length = config['max_chain_length']
        self.profile = profile_pb2.Profile()
        self.profile.string_table.append('')
        self.string_table = {}
        self.sample_types = {}
        self.sample_map = {}
        self.sample_list = []
        self.location_map = {}
        self.location_list = []
        self.mapping_map = {}
        self.mapping_list = []
        self.function_map = {}
        self.function_list = []

        # Map from dso_name in perf.data to (binary path, build_id).
        self.binary_map = {}
        self.read_elf = ReadElf(self.config['ndk_path'])
    def load_record_file(self, record_file):
        self.lib = ReportLib()
        self.lib.SetRecordFile(record_file)

        if self.config['binary_cache_dir']:
            self.lib.SetSymfs(self.config['binary_cache_dir'])
            kallsyms = os.path.join(self.config['binary_cache_dir'],
                                    'kallsyms')
            if os.path.isfile(kallsyms):
                self.lib.SetKallsymsFile(kallsyms)

        if self.config.get('show_art_frames'):
            self.lib.ShowArtFrames()

        # Process all samples in perf.data, aggregate samples.
        while True:
            report_sample = self.lib.GetNextSample()
            if report_sample is None:
                self.lib.Close()
                self.lib = None
                break
            event = self.lib.GetEventOfCurrentSample()
            symbol = self.lib.GetSymbolOfCurrentSample()
            callchain = self.lib.GetCallChainOfCurrentSample()

            if not self._filter_report_sample(report_sample):
                continue

            sample_type_id = self.get_sample_type_id(event.name)
            sample = Sample()
            sample.add_value(sample_type_id, 1)
            sample.add_value(sample_type_id + 1, report_sample.period)
            if self._filter_symbol(symbol):
                location_id = self.get_location_id(report_sample.ip, symbol)
                sample.add_location_id(location_id)
            for i in range(max(0, callchain.nr - self.max_chain_length),
                           callchain.nr):
                entry = callchain.entries[i]
                if self._filter_symbol(symbol):
                    location_id = self.get_location_id(entry.ip, entry.symbol)
                    sample.add_location_id(location_id)
            if sample.location_ids:
                self.add_sample(sample)
示例#4
0
    def __init__(self, config):
        self.config = config
        self.lib = ReportLib()

        config['binary_cache_dir'] = 'binary_cache'
        if not os.path.isdir(config['binary_cache_dir']):
            config['binary_cache_dir'] = None
        else:
            self.lib.SetSymfs(config['binary_cache_dir'])
        if config.get('perf_data_path'):
            self.lib.SetRecordFile(config['perf_data_path'])
        kallsyms = 'binary_cache/kallsyms'
        if os.path.isfile(kallsyms):
            self.lib.SetKallsymsFile(kallsyms)
        self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None
        if config.get('pid_filters'):
            self.pid_filter = {int(x) for x in config['pid_filters']}
        else:
            self.pid_filter = None
        if config.get('tid_filters'):
            self.tid_filter = {int(x) for x in config['tid_filters']}
        else:
            self.tid_filter = None
        self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None
        self.profile = profile_pb2.Profile()
        self.profile.string_table.append('')
        self.string_table = {}
        self.sample_types = {}
        self.sample_map = {}
        self.sample_list = []
        self.location_map = {}
        self.location_list = []
        self.mapping_map = {}
        self.mapping_list = []
        self.function_map = {}
        self.function_list = []
示例#5
0
    def _collect_used_binaries(self, perf_data_path):
        """read perf.data, collect all used binaries and their build id (if available)."""
        # A dict mapping from binary name to build_id
        binaries = {}
        lib = ReportLib()
        lib.SetRecordFile(perf_data_path)
        lib.SetLogSeverity('error')
        while True:
            sample = lib.GetNextSample()
            if sample is None:
                lib.Close()
                break
            symbols = [lib.GetSymbolOfCurrentSample()]
            callchain = lib.GetCallChainOfCurrentSample()
            for i in range(callchain.nr):
                symbols.append(callchain.entries[i].symbol)

            for symbol in symbols:
                dso_name = symbol.dso_name
                if dso_name not in binaries:
                    if is_jit_symfile(dso_name):
                        continue
                    binaries[dso_name] = lib.GetBuildIdForPath(dso_name)
        self.binaries = binaries
示例#6
0
 def _generate_periods(self):
     """read perf.data, collect Period for all types:
         binaries, source files, functions, lines.
     """
     for perf_data in self.config['perf_data_list']:
         lib = ReportLib()
         lib.SetRecordFile(perf_data)
         if self.symfs_dir:
             lib.SetSymfs(self.symfs_dir)
         if self.kallsyms:
             lib.SetKallsymsFile(self.kallsyms)
         while True:
             sample = lib.GetNextSample()
             if sample is None:
                 lib.Close()
                 break
             if not self._filter_sample(sample):
                 continue
             self._generate_periods_for_sample(lib, sample)
示例#7
0
    def load_record_file(self, record_file, show_art_frames):
        lib = ReportLib()
        lib.SetRecordFile(record_file)
        # If not showing ip for unknown symbols, the percent of the unknown symbol may be
        # accumulated to very big, and ranks first in the sample table.
        lib.ShowIpForUnknownSymbol()
        if show_art_frames:
            lib.ShowArtFrames()
        if self.binary_cache_path:
            lib.SetSymfs(self.binary_cache_path)
        self.meta_info = lib.MetaInfo()
        self.cmdline = lib.GetRecordCmd()
        self.arch = lib.GetArch()
        while True:
            raw_sample = lib.GetNextSample()
            if not raw_sample:
                lib.Close()
                break
            raw_event = lib.GetEventOfCurrentSample()
            symbol = lib.GetSymbolOfCurrentSample()
            callchain = lib.GetCallChainOfCurrentSample()
            event = self._get_event(raw_event.name)
            self.total_samples += 1
            event.sample_count += 1
            event.event_count += raw_sample.period
            process = event.get_process(raw_sample.pid)
            process.event_count += raw_sample.period
            thread = process.get_thread(raw_sample.tid, raw_sample.thread_comm)
            thread.event_count += raw_sample.period
            thread.sample_count += 1

            lib_id = self.libs.get_lib_id(symbol.dso_name)
            func_id = self.functions.get_func_id(lib_id, symbol)
            callstack = [(lib_id, func_id, symbol.vaddr_in_file)]
            for i in range(callchain.nr):
                symbol = callchain.entries[i].symbol
                lib_id = self.libs.get_lib_id(symbol.dso_name)
                func_id = self.functions.get_func_id(lib_id, symbol)
                callstack.append((lib_id, func_id, symbol.vaddr_in_file))
            if len(callstack) > MAX_CALLSTACK_LENGTH:
                callstack = callstack[:MAX_CALLSTACK_LENGTH]
            thread.add_callstack(raw_sample.period, callstack,
                                 self.build_addr_hit_map)

        for event in self.events.values():
            for thread in event.threads:
                thread.update_subtree_event_count()
示例#8
0
def report_sample(record_file, symfs_dir, kallsyms_file, show_tracing_data):
    """ read record_file, and print each sample"""
    lib = ReportLib()

    lib.ShowIpForUnknownSymbol()
    if symfs_dir is not None:
        lib.SetSymfs(symfs_dir)
    if record_file is not None:
        lib.SetRecordFile(record_file)
    if kallsyms_file is not None:
        lib.SetKallsymsFile(kallsyms_file)

    while True:
        sample = lib.GetNextSample()
        if sample is None:
            lib.Close()
            break
        event = lib.GetEventOfCurrentSample()
        symbol = lib.GetSymbolOfCurrentSample()
        callchain = lib.GetCallChainOfCurrentSample()

        sec = sample.time / 1000000000
        usec = (sample.time - sec * 1000000000) / 1000
        print('%s\t%d [%03d] %d.%d:\t\t%d %s:' %
              (sample.thread_comm, sample.tid, sample.cpu, sec, usec,
               sample.period, event.name))
        print('%16x\t%s (%s)' %
              (sample.ip, symbol.symbol_name, symbol.dso_name))
        for i in range(callchain.nr):
            entry = callchain.entries[i]
            print('%16x\t%s (%s)' %
                  (entry.ip, entry.symbol.symbol_name, entry.symbol.dso_name))
        if show_tracing_data:
            data = lib.GetTracingDataOfCurrentSample()
            if data:
                print('\ttracing data:')
                for key, value in data.items():
                    print('\t\t%s : %s' % (key, value))
        print('')
 def setUp(self):
     self.report_lib = ReportLib()
     self.report_lib.SetRecordFile(
         os.path.join('testdata', 'perf_with_symbols.data'))
class TestReportLib(unittest.TestCase):
    def setUp(self):
        self.report_lib = ReportLib()
        self.report_lib.SetRecordFile(
            os.path.join('testdata', 'perf_with_symbols.data'))

    def tearDown(self):
        self.report_lib.Close()

    def test_build_id(self):
        build_id = self.report_lib.GetBuildIdForPath('/data/t2')
        self.assertEqual(build_id,
                         '0x70f1fe24500fc8b0d9eb477199ca1ca21acca4de')

    def test_symbol(self):
        found_func2 = False
        while self.report_lib.GetNextSample():
            sample = self.report_lib.GetCurrentSample()
            symbol = self.report_lib.GetSymbolOfCurrentSample()
            if symbol.symbol_name == 'func2(int, int)':
                found_func2 = True
                self.assertEqual(symbol.symbol_addr, 0x4004ed)
                self.assertEqual(symbol.symbol_len, 0x14)
        self.assertTrue(found_func2)

    def test_sample(self):
        found_sample = False
        while self.report_lib.GetNextSample():
            sample = self.report_lib.GetCurrentSample()
            if sample.ip == 0x4004ff and sample.time == 7637889424953:
                found_sample = True
                self.assertEqual(sample.pid, 15926)
                self.assertEqual(sample.tid, 15926)
                self.assertEqual(sample.thread_comm, 't2')
                self.assertEqual(sample.cpu, 5)
                self.assertEqual(sample.period, 694614)
                event = self.report_lib.GetEventOfCurrentSample()
                self.assertEqual(event.name, 'cpu-cycles')
                callchain = self.report_lib.GetCallChainOfCurrentSample()
                self.assertEqual(callchain.nr, 0)
        self.assertTrue(found_sample)

    def test_meta_info(self):
        self.report_lib.SetRecordFile(
            os.path.join('testdata', 'perf_with_trace_offcpu.data'))
        meta_info = self.report_lib.MetaInfo()
        self.assertTrue("simpleperf_version" in meta_info)
        self.assertEqual(meta_info["system_wide_collection"], "false")
        self.assertEqual(meta_info["trace_offcpu"], "true")
        self.assertEqual(meta_info["event_type_info"],
                         "cpu-cycles,0,0\nsched:sched_switch,2,47")
        self.assertTrue("product_props" in meta_info)

    def test_event_name_from_meta_info(self):
        self.report_lib.SetRecordFile(
            os.path.join('testdata', 'perf_with_tracepoint_event.data'))
        event_names = set()
        while self.report_lib.GetNextSample():
            event_names.add(self.report_lib.GetEventOfCurrentSample().name)
        self.assertTrue('sched:sched_switch' in event_names)
        self.assertTrue('cpu-cycles' in event_names)

    def test_record_cmd(self):
        self.report_lib.SetRecordFile(
            os.path.join('testdata', 'perf_with_trace_offcpu.data'))
        self.assertEqual(
            self.report_lib.GetRecordCmd(),
            "/data/local/tmp/simpleperf record --trace-offcpu --duration 2 -g ./simpleperf_runtest_run_and_sleep64"
        )

    def test_offcpu(self):
        self.report_lib.SetRecordFile(
            os.path.join('testdata', 'perf_with_trace_offcpu.data'))
        total_period = 0
        sleep_function_period = 0
        sleep_function_name = "SleepFunction(unsigned long long)"
        while self.report_lib.GetNextSample():
            sample = self.report_lib.GetCurrentSample()
            total_period += sample.period
            if self.report_lib.GetSymbolOfCurrentSample(
            ).symbol_name == sleep_function_name:
                sleep_function_period += sample.period
                continue
            callchain = self.report_lib.GetCallChainOfCurrentSample()
            for i in range(callchain.nr):
                if callchain.entries[
                        i].symbol.symbol_name == sleep_function_name:
                    sleep_function_period += sample.period
                    break
            self.assertEqual(self.report_lib.GetEventOfCurrentSample().name,
                             'cpu-cycles')
        sleep_percentage = float(sleep_function_period) / total_period
        self.assertGreater(sleep_percentage, 0.30)
示例#11
0
def parse_samples(process, args):
    """ read record_file, and print each sample"""

    record_file = args.record_file
    symfs_dir = args.symfs
    kallsyms_file = args.kallsyms

    lib = ReportLib()

    lib.ShowIpForUnknownSymbol()
    if symfs_dir:
        lib.SetSymfs(symfs_dir)
    if record_file:
        lib.SetRecordFile(record_file)
    if kallsyms_file:
        lib.SetKallsymsFile(kallsyms_file)
    process.cmd = lib.GetRecordCmd()
    product_props = lib.MetaInfo().get("product_props")
    if product_props:
        tuple = product_props.split(':')
        process.props['ro.product.manufacturer'] = tuple[0]
        process.props['ro.product.model'] = tuple[1]
        process.props['ro.product.name'] = tuple[2]

    while True:
        sample = lib.GetNextSample()
        if sample is None:
            lib.Close()
            break
        symbol = lib.GetSymbolOfCurrentSample()
        callchain = lib.GetCallChainOfCurrentSample()
        process.get_thread(sample.tid,
                           sample.pid).add_callchain(callchain, symbol, sample)
        process.num_samples += 1

    if process.pid == 0:
        main_threads = [
            thread for thread in process.threads.values()
            if thread.tid == thread.pid
        ]
        if main_threads:
            process.name = main_threads[0].name
            process.pid = main_threads[0].pid

    for thread in process.threads.values():
        min_event_count = thread.event_count * args.min_callchain_percentage * 0.01
        thread.flamegraph.trim_callchain(min_event_count)

    log_info("Parsed %s callchains." % process.num_samples)
示例#12
0
def parse_samples(process, args, sample_filter_fn):
    """Read samples from record file.
        process: Process object
        args: arguments
        sample_filter_fn: if not None, is used to modify and filter samples.
                          It returns false for samples should be filtered out.
    """

    record_file = args.record_file
    symfs_dir = args.symfs
    kallsyms_file = args.kallsyms

    lib = ReportLib()

    lib.ShowIpForUnknownSymbol()
    if symfs_dir:
        lib.SetSymfs(symfs_dir)
    if record_file:
        lib.SetRecordFile(record_file)
    if kallsyms_file:
        lib.SetKallsymsFile(kallsyms_file)
    process.cmd = lib.GetRecordCmd()
    product_props = lib.MetaInfo().get("product_props")
    if product_props:
        tuple = product_props.split(':')
        process.props['ro.product.manufacturer'] = tuple[0]
        process.props['ro.product.model'] = tuple[1]
        process.props['ro.product.name'] = tuple[2]
    if lib.MetaInfo().get('trace_offcpu') == 'true':
        process.props['trace_offcpu'] = True
        if args.one_flamegraph:
            log_exit(
                "It doesn't make sense to report with --one-flamegraph for perf.data "
                + "recorded with --trace-offcpu."
                "")
    else:
        process.props['trace_offcpu'] = False

    while True:
        sample = lib.GetNextSample()
        if sample is None:
            lib.Close()
            break
        symbol = lib.GetSymbolOfCurrentSample()
        callchain = lib.GetCallChainOfCurrentSample()
        if sample_filter_fn and not sample_filter_fn(sample, symbol,
                                                     callchain):
            continue
        process.add_sample(sample, symbol, callchain)

    if process.pid == 0:
        main_threads = [
            thread for thread in process.threads.values()
            if thread.tid == thread.pid
        ]
        if main_threads:
            process.name = main_threads[0].name
            process.pid = main_threads[0].pid

    for thread in process.threads.values():
        min_event_count = thread.num_events * args.min_callchain_percentage * 0.01
        thread.flamegraph.trim_callchain(min_event_count)

    log_info("Parsed %s callchains." % process.num_samples)
示例#13
0
class PprofProfileGenerator(object):

    def __init__(self, config):
        self.config = config
        self.lib = ReportLib()

        config['binary_cache_dir'] = 'binary_cache'
        if not os.path.isdir(config['binary_cache_dir']):
            config['binary_cache_dir'] = None
        else:
            self.lib.SetSymfs(config['binary_cache_dir'])
        if config.get('perf_data_path'):
            self.lib.SetRecordFile(config['perf_data_path'])
        kallsyms = 'binary_cache/kallsyms'
        if os.path.isfile(kallsyms):
            self.lib.SetKallsymsFile(kallsyms)
        self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None
        if config.get('pid_filters'):
            self.pid_filter = {int(x) for x in config['pid_filters']}
        else:
            self.pid_filter = None
        if config.get('tid_filters'):
            self.tid_filter = {int(x) for x in config['tid_filters']}
        else:
            self.tid_filter = None
        self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None
        self.profile = profile_pb2.Profile()
        self.profile.string_table.append('')
        self.string_table = {}
        self.sample_types = {}
        self.sample_map = {}
        self.sample_list = []
        self.location_map = {}
        self.location_list = []
        self.mapping_map = {}
        self.mapping_list = []
        self.function_map = {}
        self.function_list = []

    def gen(self):
        # 1. Process all samples in perf.data, aggregate samples.
        while True:
            report_sample = self.lib.GetNextSample()
            if report_sample is None:
                self.lib.Close()
                break
            event = self.lib.GetEventOfCurrentSample()
            symbol = self.lib.GetSymbolOfCurrentSample()
            callchain = self.lib.GetCallChainOfCurrentSample()

            if not self._filter_report_sample(report_sample):
                continue

            sample_type_id = self.get_sample_type_id(event.name)
            sample = Sample()
            sample.add_value(sample_type_id, 1)
            sample.add_value(sample_type_id + 1, report_sample.period)
            if self._filter_symbol(symbol):
                location_id = self.get_location_id(symbol.vaddr_in_file, symbol)
                sample.add_location_id(location_id)
            for i in range(callchain.nr):
                entry = callchain.entries[i]
                if self._filter_symbol(symbol):
                    location_id = self.get_location_id(entry.ip, entry.symbol)
                    sample.add_location_id(location_id)
            if sample.location_ids:
                self.add_sample(sample)

        # 2. Generate line info for locations and functions.
        self.gen_source_lines()

        # 3. Produce samples/locations/functions in profile
        for sample in self.sample_list:
            self.gen_profile_sample(sample)
        for mapping in self.mapping_list:
            self.gen_profile_mapping(mapping)
        for location in self.location_list:
            self.gen_profile_location(location)
        for function in self.function_list:
            self.gen_profile_function(function)

        return self.profile

    def _filter_report_sample(self, sample):
        """Return true if the sample can be used."""
        if self.comm_filter:
            if sample.thread_comm not in self.comm_filter:
                return False
            if self.pid_filter:
                if sample.pid not in self.pid_filter:
                    return False
            if self.tid_filter:
                if sample.tid not in self.tid_filter:
                    return False
        return True

    def _filter_symbol(self, symbol):
        if not self.dso_filter or symbol.dso_name in self.dso_filter:
            return True
        return False

    def get_string_id(self, str_value):
        if not str_value:
            return 0
        str_id = self.string_table.get(str_value)
        if str_id is not None:
            return str_id
        str_id = len(self.string_table) + 1
        self.string_table[str_value] = str_id
        self.profile.string_table.append(str_value)
        return str_id

    def get_string(self, str_id):
        return self.profile.string_table[str_id]

    def get_sample_type_id(self, name):
        sample_type_id = self.sample_types.get(name)
        if sample_type_id is not None:
            return sample_type_id
        sample_type_id = len(self.profile.sample_type)
        sample_type = self.profile.sample_type.add()
        sample_type.type = self.get_string_id('event_' + name + '_samples')
        sample_type.unit = self.get_string_id('count')
        sample_type = self.profile.sample_type.add()
        sample_type.type = self.get_string_id('event_' + name + '_count')
        sample_type.unit = self.get_string_id('count')
        self.sample_types[name] = sample_type_id
        return sample_type_id

    def get_location_id(self, ip, symbol):
        mapping_id = self.get_mapping_id(symbol.mapping[0], symbol.dso_name)
        location = Location(mapping_id, ip, symbol.vaddr_in_file)
        function_id = self.get_function_id(symbol.symbol_name, symbol.dso_name,
                                           symbol.symbol_addr)
        if function_id:
            # Add Line only when it has a valid function id, see http://b/36988814.
            # Default line info only contains the function name
            line = Line()
            line.function_id = function_id
            location.lines.append(line)

        exist_location = self.location_map.get(location.key)
        if exist_location:
            return exist_location.id
        # location_id starts from 1
        location.id = len(self.location_list) + 1
        self.location_list.append(location)
        self.location_map[location.key] = location
        return location.id

    def get_mapping_id(self, report_mapping, filename):
        filename_id = self.get_string_id(filename)
        build_id = self.lib.GetBuildIdForPath(filename)
        if build_id and build_id[0:2] == "0x":
            build_id = build_id[2:]
        build_id_id = self.get_string_id(build_id)
        mapping = Mapping(report_mapping.start, report_mapping.end,
                          report_mapping.pgoff, filename_id, build_id_id)
        exist_mapping = self.mapping_map.get(mapping.key)
        if exist_mapping:
            return exist_mapping.id
        # mapping_id starts from 1
        mapping.id = len(self.mapping_list) + 1
        self.mapping_list.append(mapping)
        self.mapping_map[mapping.key] = mapping
        return mapping.id

    def get_mapping(self, mapping_id):
        return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None

    def get_function_id(self, name, dso_name, vaddr_in_file):
        if name == 'unknown':
            return 0
        function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file)
        exist_function = self.function_map.get(function.key)
        if exist_function:
            return exist_function.id
        # function_id starts from 1
        function.id = len(self.function_list) + 1
        self.function_list.append(function)
        self.function_map[function.key] = function
        return function.id

    def get_function(self, function_id):
        return self.function_list[function_id - 1] if function_id > 0 else None

    def add_sample(self, sample):
        exist_sample = self.sample_map.get(sample.key)
        if exist_sample:
            exist_sample.add_values(sample.values)
        else:
            self.sample_list.append(sample)
            self.sample_map[sample.key] = sample

    def gen_source_lines(self):
        # 1. Create Addr2line instance
        if not self.config.get('binary_cache_dir'):
            log_info("Can't generate line information because binary_cache is missing.")
            return
        if not find_tool_path('addr2line', self.config['ndk_path']):
            log_info("Can't generate line information because can't find addr2line.")
            return
        addr2line = Addr2Nearestline(self.config['ndk_path'], self.config['binary_cache_dir'], True)

        # 2. Put all needed addresses to it.
        for location in self.location_list:
            mapping = self.get_mapping(location.mapping_id)
            dso_name = self.get_string(mapping.filename_id)
            if location.lines:
                function = self.get_function(location.lines[0].function_id)
                addr2line.add_addr(dso_name, function.vaddr_in_dso, location.vaddr_in_dso)
        for function in self.function_list:
            dso_name = self.get_string(function.dso_name_id)
            addr2line.add_addr(dso_name, function.vaddr_in_dso, function.vaddr_in_dso)

        # 3. Generate source lines.
        addr2line.convert_addrs_to_lines()

        # 4. Annotate locations and functions.
        for location in self.location_list:
            if not location.lines:
                continue
            mapping = self.get_mapping(location.mapping_id)
            dso_name = self.get_string(mapping.filename_id)
            dso = addr2line.get_dso(dso_name)
            if not dso:
                continue
            sources = addr2line.get_addr_source(dso, location.vaddr_in_dso)
            if not sources:
                continue
            for (source_id, source) in enumerate(sources):
                source_file, source_line, function_name = source
                function_id = self.get_function_id(function_name, dso_name, 0)
                if function_id == 0:
                    continue
                if source_id == 0:
                    # Clear default line info
                    location.lines = []
                location.lines.append(self.add_line(source_file, source_line, function_id))

        for function in self.function_list:
            dso_name = self.get_string(function.dso_name_id)
            if function.vaddr_in_dso:
                dso = addr2line.get_dso(dso_name)
                if not dso:
                    continue
                sources = addr2line.get_addr_source(dso, function.vaddr_in_dso)
                if sources:
                    source_file, source_line, _ = sources[0]
                    function.source_filename_id = self.get_string_id(source_file)
                    function.start_line = source_line

    def add_line(self, source_file, source_line, function_id):
        line = Line()
        function = self.get_function(function_id)
        function.source_filename_id = self.get_string_id(source_file)
        line.function_id = function_id
        line.line = source_line
        return line

    def gen_profile_sample(self, sample):
        profile_sample = self.profile.sample.add()
        profile_sample.location_id.extend(sample.location_ids)
        sample_type_count = len(self.sample_types) * 2
        values = [0] * sample_type_count
        for sample_type_id in sample.values:
            values[sample_type_id] = sample.values[sample_type_id]
        profile_sample.value.extend(values)

    def gen_profile_mapping(self, mapping):
        profile_mapping = self.profile.mapping.add()
        profile_mapping.id = mapping.id
        profile_mapping.memory_start = mapping.memory_start
        profile_mapping.memory_limit = mapping.memory_limit
        profile_mapping.file_offset = mapping.file_offset
        profile_mapping.filename = mapping.filename_id
        profile_mapping.build_id = mapping.build_id_id
        profile_mapping.has_filenames = True
        profile_mapping.has_functions = True
        if self.config.get('binary_cache_dir'):
            profile_mapping.has_line_numbers = True
            profile_mapping.has_inline_frames = True
        else:
            profile_mapping.has_line_numbers = False
            profile_mapping.has_inline_frames = False

    def gen_profile_location(self, location):
        profile_location = self.profile.location.add()
        profile_location.id = location.id
        profile_location.mapping_id = location.mapping_id
        profile_location.address = location.address
        for i in range(len(location.lines)):
            line = profile_location.line.add()
            line.function_id = location.lines[i].function_id
            line.line = location.lines[i].line

    def gen_profile_function(self, function):
        profile_function = self.profile.function.add()
        profile_function.id = function.id
        profile_function.name = function.name_id
        profile_function.system_name = function.name_id
        profile_function.filename = function.source_filename_id
        profile_function.start_line = function.start_line
示例#14
0
class PprofProfileGenerator(object):
    def __init__(self, config):
        self.config = config
        self.lib = ReportLib()

        config['binary_cache_dir'] = 'binary_cache'
        if not os.path.isdir(config['binary_cache_dir']):
            config['binary_cache_dir'] = None
        else:
            self.lib.SetSymfs(config['binary_cache_dir'])
        if config.get('perf_data_path'):
            self.lib.SetRecordFile(config['perf_data_path'])
        kallsyms = 'binary_cache/kallsyms'
        if os.path.isfile(kallsyms):
            self.lib.SetKallsymsFile(kallsyms)
        if config.get('show_art_frames'):
            self.lib.ShowArtFrames()
        self.comm_filter = set(
            config['comm_filters']) if config.get('comm_filters') else None
        if config.get('pid_filters'):
            self.pid_filter = {int(x) for x in config['pid_filters']}
        else:
            self.pid_filter = None
        if config.get('tid_filters'):
            self.tid_filter = {int(x) for x in config['tid_filters']}
        else:
            self.tid_filter = None
        self.dso_filter = set(
            config['dso_filters']) if config.get('dso_filters') else None
        self.max_chain_length = config['max_chain_length']
        self.profile = profile_pb2.Profile()
        self.profile.string_table.append('')
        self.string_table = {}
        self.sample_types = {}
        self.sample_map = {}
        self.sample_list = []
        self.location_map = {}
        self.location_list = []
        self.mapping_map = {}
        self.mapping_list = []
        self.function_map = {}
        self.function_list = []

        # Map from dso_name in perf.data to (binary path, build_id).
        self.binary_map = {}
        self.read_elf = ReadElf(self.config['ndk_path'])

    def gen(self):
        # 1. Process all samples in perf.data, aggregate samples.
        while True:
            report_sample = self.lib.GetNextSample()
            if report_sample is None:
                self.lib.Close()
                break
            event = self.lib.GetEventOfCurrentSample()
            symbol = self.lib.GetSymbolOfCurrentSample()
            callchain = self.lib.GetCallChainOfCurrentSample()

            if not self._filter_report_sample(report_sample):
                continue

            sample_type_id = self.get_sample_type_id(event.name)
            sample = Sample()
            sample.add_value(sample_type_id, 1)
            sample.add_value(sample_type_id + 1, report_sample.period)
            if self._filter_symbol(symbol):
                location_id = self.get_location_id(report_sample.ip, symbol)
                sample.add_location_id(location_id)
            for i in range(max(0, callchain.nr - self.max_chain_length),
                           callchain.nr):
                entry = callchain.entries[i]
                if self._filter_symbol(symbol):
                    location_id = self.get_location_id(entry.ip, entry.symbol)
                    sample.add_location_id(location_id)
            if sample.location_ids:
                self.add_sample(sample)

        # 2. Generate line info for locations and functions.
        self.gen_source_lines()

        # 3. Produce samples/locations/functions in profile
        for sample in self.sample_list:
            self.gen_profile_sample(sample)
        for mapping in self.mapping_list:
            self.gen_profile_mapping(mapping)
        for location in self.location_list:
            self.gen_profile_location(location)
        for function in self.function_list:
            self.gen_profile_function(function)

        return self.profile

    def _filter_report_sample(self, sample):
        """Return true if the sample can be used."""
        if self.comm_filter:
            if sample.thread_comm not in self.comm_filter:
                return False
        if self.pid_filter:
            if sample.pid not in self.pid_filter:
                return False
        if self.tid_filter:
            if sample.tid not in self.tid_filter:
                return False
        return True

    def _filter_symbol(self, symbol):
        if not self.dso_filter or symbol.dso_name in self.dso_filter:
            return True
        return False

    def get_string_id(self, str_value):
        if not str_value:
            return 0
        str_id = self.string_table.get(str_value)
        if str_id is not None:
            return str_id
        str_id = len(self.string_table) + 1
        self.string_table[str_value] = str_id
        self.profile.string_table.append(str_value)
        return str_id

    def get_string(self, str_id):
        return self.profile.string_table[str_id]

    def get_sample_type_id(self, name):
        sample_type_id = self.sample_types.get(name)
        if sample_type_id is not None:
            return sample_type_id
        sample_type_id = len(self.profile.sample_type)
        sample_type = self.profile.sample_type.add()
        sample_type.type = self.get_string_id('event_' + name + '_samples')
        sample_type.unit = self.get_string_id('count')
        sample_type = self.profile.sample_type.add()
        sample_type.type = self.get_string_id('event_' + name + '_count')
        sample_type.unit = self.get_string_id('count')
        self.sample_types[name] = sample_type_id
        return sample_type_id

    def get_location_id(self, ip, symbol):
        binary_path, build_id = self.get_binary(symbol.dso_name)
        mapping_id = self.get_mapping_id(symbol.mapping[0], binary_path,
                                         build_id)
        location = Location(mapping_id, ip, symbol.vaddr_in_file)
        function_id = self.get_function_id(symbol.symbol_name, binary_path,
                                           symbol.symbol_addr)
        if function_id:
            # Add Line only when it has a valid function id, see http://b/36988814.
            # Default line info only contains the function name
            line = Line()
            line.function_id = function_id
            location.lines.append(line)

        exist_location = self.location_map.get(location.key)
        if exist_location:
            return exist_location.id
        # location_id starts from 1
        location.id = len(self.location_list) + 1
        self.location_list.append(location)
        self.location_map[location.key] = location
        return location.id

    def get_mapping_id(self, report_mapping, filename, build_id):
        filename_id = self.get_string_id(filename)
        build_id_id = self.get_string_id(build_id)
        mapping = Mapping(report_mapping.start, report_mapping.end,
                          report_mapping.pgoff, filename_id, build_id_id)
        exist_mapping = self.mapping_map.get(mapping.key)
        if exist_mapping:
            return exist_mapping.id
        # mapping_id starts from 1
        mapping.id = len(self.mapping_list) + 1
        self.mapping_list.append(mapping)
        self.mapping_map[mapping.key] = mapping
        return mapping.id

    def get_binary(self, dso_name):
        """ Return (binary_path, build_id) for a given dso_name. """
        value = self.binary_map.get(dso_name)
        if value:
            return value

        binary_path = dso_name
        build_id = ''

        # The build ids in perf.data are padded to 20 bytes, but pprof needs without padding.
        # So read build id from the binary in binary_cache, and check it with build id in
        # perf.data.
        build_id_in_perf_data = self.lib.GetBuildIdForPath(dso_name)
        # Try elf_path in binary cache.
        elf_path = find_real_dso_path(dso_name,
                                      self.config['binary_cache_dir'])
        if elf_path:
            elf_build_id = self.read_elf.get_build_id(elf_path, False)
            if build_id_in_perf_data:
                match = build_id_in_perf_data == self.read_elf.pad_build_id(
                    elf_build_id)
            else:
                # odex files generated by ART on Android O don't contain build id.
                match = not elf_build_id
            if match:
                build_id = elf_build_id
                binary_path = elf_path

        # When there is no matching elf_path, try converting build_id in perf.data.
        if not build_id and build_id_in_perf_data.startswith('0x'):
            # Fallback to the way used by TrimZeroesFromBuildIDString() in quipper.
            build_id = build_id_in_perf_data[2:]  # remove '0x'
            padding = '0' * 8
            while build_id.endswith(padding):
                build_id = build_id[:-len(padding)]

        self.binary_map[dso_name] = (binary_path, build_id)
        return (binary_path, build_id)

    def get_mapping(self, mapping_id):
        return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None

    def get_function_id(self, name, dso_name, vaddr_in_file):
        if name == 'unknown':
            return 0
        function = Function(self.get_string_id(name),
                            self.get_string_id(dso_name), vaddr_in_file)
        exist_function = self.function_map.get(function.key)
        if exist_function:
            return exist_function.id
        # function_id starts from 1
        function.id = len(self.function_list) + 1
        self.function_list.append(function)
        self.function_map[function.key] = function
        return function.id

    def get_function(self, function_id):
        return self.function_list[function_id - 1] if function_id > 0 else None

    def add_sample(self, sample):
        exist_sample = self.sample_map.get(sample.key)
        if exist_sample:
            exist_sample.add_values(sample.values)
        else:
            self.sample_list.append(sample)
            self.sample_map[sample.key] = sample

    def gen_source_lines(self):
        # 1. Create Addr2line instance
        if not self.config.get('binary_cache_dir'):
            log_info(
                "Can't generate line information because binary_cache is missing."
            )
            return
        if not find_tool_path('llvm-symbolizer', self.config['ndk_path']):
            log_info(
                "Can't generate line information because can't find llvm-symbolizer."
            )
            return
        # We have changed dso names to paths in binary_cache in self.get_binary(). So no need to
        # pass binary_cache_dir to addr2line.
        addr2line = Addr2Nearestline(self.config['ndk_path'], None, True)

        # 2. Put all needed addresses to it.
        for location in self.location_list:
            mapping = self.get_mapping(location.mapping_id)
            dso_name = self.get_string(mapping.filename_id)
            if location.lines:
                function = self.get_function(location.lines[0].function_id)
                addr2line.add_addr(dso_name, function.vaddr_in_dso,
                                   location.vaddr_in_dso)
        for function in self.function_list:
            dso_name = self.get_string(function.dso_name_id)
            addr2line.add_addr(dso_name, function.vaddr_in_dso,
                               function.vaddr_in_dso)

        # 3. Generate source lines.
        addr2line.convert_addrs_to_lines()

        # 4. Annotate locations and functions.
        for location in self.location_list:
            if not location.lines:
                continue
            mapping = self.get_mapping(location.mapping_id)
            dso_name = self.get_string(mapping.filename_id)
            dso = addr2line.get_dso(dso_name)
            if not dso:
                continue
            sources = addr2line.get_addr_source(dso, location.vaddr_in_dso)
            if not sources:
                continue
            for (source_id, source) in enumerate(sources):
                source_file, source_line, function_name = source
                function_id = self.get_function_id(function_name, dso_name, 0)
                if function_id == 0:
                    continue
                if source_id == 0:
                    # Clear default line info
                    location.lines = []
                location.lines.append(
                    self.add_line(source_file, source_line, function_id))

        for function in self.function_list:
            dso_name = self.get_string(function.dso_name_id)
            if function.vaddr_in_dso:
                dso = addr2line.get_dso(dso_name)
                if not dso:
                    continue
                sources = addr2line.get_addr_source(dso, function.vaddr_in_dso)
                if sources:
                    source_file, source_line, _ = sources[0]
                    function.source_filename_id = self.get_string_id(
                        source_file)
                    function.start_line = source_line

    def add_line(self, source_file, source_line, function_id):
        line = Line()
        function = self.get_function(function_id)
        function.source_filename_id = self.get_string_id(source_file)
        line.function_id = function_id
        line.line = source_line
        return line

    def gen_profile_sample(self, sample):
        profile_sample = self.profile.sample.add()
        profile_sample.location_id.extend(sample.location_ids)
        sample_type_count = len(self.sample_types) * 2
        values = [0] * sample_type_count
        for sample_type_id in sample.values:
            values[sample_type_id] = sample.values[sample_type_id]
        profile_sample.value.extend(values)

    def gen_profile_mapping(self, mapping):
        profile_mapping = self.profile.mapping.add()
        profile_mapping.id = mapping.id
        profile_mapping.memory_start = mapping.memory_start
        profile_mapping.memory_limit = mapping.memory_limit
        profile_mapping.file_offset = mapping.file_offset
        profile_mapping.filename = mapping.filename_id
        profile_mapping.build_id = mapping.build_id_id
        profile_mapping.has_filenames = True
        profile_mapping.has_functions = True
        if self.config.get('binary_cache_dir'):
            profile_mapping.has_line_numbers = True
            profile_mapping.has_inline_frames = True
        else:
            profile_mapping.has_line_numbers = False
            profile_mapping.has_inline_frames = False

    def gen_profile_location(self, location):
        profile_location = self.profile.location.add()
        profile_location.id = location.id
        profile_location.mapping_id = location.mapping_id
        profile_location.address = location.address
        for i in range(len(location.lines)):
            line = profile_location.line.add()
            line.function_id = location.lines[i].function_id
            line.line = location.lines[i].line

    def gen_profile_function(self, function):
        profile_function = self.profile.function.add()
        profile_function.id = function.id
        profile_function.name = function.name_id
        profile_function.system_name = function.name_id
        profile_function.filename = function.source_filename_id
        profile_function.start_line = function.start_line