def _collect_addrs(self): """Read perf.data, collect all addresses we need to convert to source file:line. """ for perf_data in self.config['perf_data_list']: lib = ReportLib() lib.SetRecordFile(perf_data) if self.symfs_dir: lib.SetSymfs(self.symfs_dir) if self.kallsyms: lib.SetKallsymsFile(self.kallsyms) while True: sample = lib.GetNextSample() if sample is None: lib.Close() break if not self._filter_sample(sample): continue symbols = [] symbols.append(lib.GetSymbolOfCurrentSample()) callchain = lib.GetCallChainOfCurrentSample() for i in range(callchain.nr): symbols.append(callchain.entries[i].symbol) for symbol in symbols: if self._filter_symbol(symbol): self.addr2line.add_addr(symbol.dso_name, symbol.symbol_addr, symbol.vaddr_in_file) self.addr2line.add_addr(symbol.dso_name, symbol.symbol_addr, symbol.symbol_addr)
def __init__(self, config): self.config = config self.lib = ReportLib() config['binary_cache_dir'] = 'binary_cache' if not os.path.isdir(config['binary_cache_dir']): config['binary_cache_dir'] = None else: self.lib.SetSymfs(config['binary_cache_dir']) if config.get('perf_data_path'): self.lib.SetRecordFile(config['perf_data_path']) kallsyms = 'binary_cache/kallsyms' if os.path.isfile(kallsyms): self.lib.SetKallsymsFile(kallsyms) if config.get('show_art_frames'): self.lib.ShowArtFrames() self.comm_filter = set( config['comm_filters']) if config.get('comm_filters') else None if config.get('pid_filters'): self.pid_filter = {int(x) for x in config['pid_filters']} else: self.pid_filter = None if config.get('tid_filters'): self.tid_filter = {int(x) for x in config['tid_filters']} else: self.tid_filter = None self.dso_filter = set( config['dso_filters']) if config.get('dso_filters') else None self.max_chain_length = config['max_chain_length'] self.profile = profile_pb2.Profile() self.profile.string_table.append('') self.string_table = {} self.sample_types = {} self.sample_map = {} self.sample_list = [] self.location_map = {} self.location_list = [] self.mapping_map = {} self.mapping_list = [] self.function_map = {} self.function_list = [] # Map from dso_name in perf.data to (binary path, build_id). self.binary_map = {} self.read_elf = ReadElf(self.config['ndk_path'])
def load_record_file(self, record_file): self.lib = ReportLib() self.lib.SetRecordFile(record_file) if self.config['binary_cache_dir']: self.lib.SetSymfs(self.config['binary_cache_dir']) kallsyms = os.path.join(self.config['binary_cache_dir'], 'kallsyms') if os.path.isfile(kallsyms): self.lib.SetKallsymsFile(kallsyms) if self.config.get('show_art_frames'): self.lib.ShowArtFrames() # Process all samples in perf.data, aggregate samples. while True: report_sample = self.lib.GetNextSample() if report_sample is None: self.lib.Close() self.lib = None break event = self.lib.GetEventOfCurrentSample() symbol = self.lib.GetSymbolOfCurrentSample() callchain = self.lib.GetCallChainOfCurrentSample() if not self._filter_report_sample(report_sample): continue sample_type_id = self.get_sample_type_id(event.name) sample = Sample() sample.add_value(sample_type_id, 1) sample.add_value(sample_type_id + 1, report_sample.period) if self._filter_symbol(symbol): location_id = self.get_location_id(report_sample.ip, symbol) sample.add_location_id(location_id) for i in range(max(0, callchain.nr - self.max_chain_length), callchain.nr): entry = callchain.entries[i] if self._filter_symbol(symbol): location_id = self.get_location_id(entry.ip, entry.symbol) sample.add_location_id(location_id) if sample.location_ids: self.add_sample(sample)
def __init__(self, config): self.config = config self.lib = ReportLib() config['binary_cache_dir'] = 'binary_cache' if not os.path.isdir(config['binary_cache_dir']): config['binary_cache_dir'] = None else: self.lib.SetSymfs(config['binary_cache_dir']) if config.get('perf_data_path'): self.lib.SetRecordFile(config['perf_data_path']) kallsyms = 'binary_cache/kallsyms' if os.path.isfile(kallsyms): self.lib.SetKallsymsFile(kallsyms) self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None if config.get('pid_filters'): self.pid_filter = {int(x) for x in config['pid_filters']} else: self.pid_filter = None if config.get('tid_filters'): self.tid_filter = {int(x) for x in config['tid_filters']} else: self.tid_filter = None self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None self.profile = profile_pb2.Profile() self.profile.string_table.append('') self.string_table = {} self.sample_types = {} self.sample_map = {} self.sample_list = [] self.location_map = {} self.location_list = [] self.mapping_map = {} self.mapping_list = [] self.function_map = {} self.function_list = []
def _collect_used_binaries(self, perf_data_path): """read perf.data, collect all used binaries and their build id (if available).""" # A dict mapping from binary name to build_id binaries = {} lib = ReportLib() lib.SetRecordFile(perf_data_path) lib.SetLogSeverity('error') while True: sample = lib.GetNextSample() if sample is None: lib.Close() break symbols = [lib.GetSymbolOfCurrentSample()] callchain = lib.GetCallChainOfCurrentSample() for i in range(callchain.nr): symbols.append(callchain.entries[i].symbol) for symbol in symbols: dso_name = symbol.dso_name if dso_name not in binaries: if is_jit_symfile(dso_name): continue binaries[dso_name] = lib.GetBuildIdForPath(dso_name) self.binaries = binaries
def _generate_periods(self): """read perf.data, collect Period for all types: binaries, source files, functions, lines. """ for perf_data in self.config['perf_data_list']: lib = ReportLib() lib.SetRecordFile(perf_data) if self.symfs_dir: lib.SetSymfs(self.symfs_dir) if self.kallsyms: lib.SetKallsymsFile(self.kallsyms) while True: sample = lib.GetNextSample() if sample is None: lib.Close() break if not self._filter_sample(sample): continue self._generate_periods_for_sample(lib, sample)
def load_record_file(self, record_file, show_art_frames): lib = ReportLib() lib.SetRecordFile(record_file) # If not showing ip for unknown symbols, the percent of the unknown symbol may be # accumulated to very big, and ranks first in the sample table. lib.ShowIpForUnknownSymbol() if show_art_frames: lib.ShowArtFrames() if self.binary_cache_path: lib.SetSymfs(self.binary_cache_path) self.meta_info = lib.MetaInfo() self.cmdline = lib.GetRecordCmd() self.arch = lib.GetArch() while True: raw_sample = lib.GetNextSample() if not raw_sample: lib.Close() break raw_event = lib.GetEventOfCurrentSample() symbol = lib.GetSymbolOfCurrentSample() callchain = lib.GetCallChainOfCurrentSample() event = self._get_event(raw_event.name) self.total_samples += 1 event.sample_count += 1 event.event_count += raw_sample.period process = event.get_process(raw_sample.pid) process.event_count += raw_sample.period thread = process.get_thread(raw_sample.tid, raw_sample.thread_comm) thread.event_count += raw_sample.period thread.sample_count += 1 lib_id = self.libs.get_lib_id(symbol.dso_name) func_id = self.functions.get_func_id(lib_id, symbol) callstack = [(lib_id, func_id, symbol.vaddr_in_file)] for i in range(callchain.nr): symbol = callchain.entries[i].symbol lib_id = self.libs.get_lib_id(symbol.dso_name) func_id = self.functions.get_func_id(lib_id, symbol) callstack.append((lib_id, func_id, symbol.vaddr_in_file)) if len(callstack) > MAX_CALLSTACK_LENGTH: callstack = callstack[:MAX_CALLSTACK_LENGTH] thread.add_callstack(raw_sample.period, callstack, self.build_addr_hit_map) for event in self.events.values(): for thread in event.threads: thread.update_subtree_event_count()
def report_sample(record_file, symfs_dir, kallsyms_file, show_tracing_data): """ read record_file, and print each sample""" lib = ReportLib() lib.ShowIpForUnknownSymbol() if symfs_dir is not None: lib.SetSymfs(symfs_dir) if record_file is not None: lib.SetRecordFile(record_file) if kallsyms_file is not None: lib.SetKallsymsFile(kallsyms_file) while True: sample = lib.GetNextSample() if sample is None: lib.Close() break event = lib.GetEventOfCurrentSample() symbol = lib.GetSymbolOfCurrentSample() callchain = lib.GetCallChainOfCurrentSample() sec = sample.time / 1000000000 usec = (sample.time - sec * 1000000000) / 1000 print('%s\t%d [%03d] %d.%d:\t\t%d %s:' % (sample.thread_comm, sample.tid, sample.cpu, sec, usec, sample.period, event.name)) print('%16x\t%s (%s)' % (sample.ip, symbol.symbol_name, symbol.dso_name)) for i in range(callchain.nr): entry = callchain.entries[i] print('%16x\t%s (%s)' % (entry.ip, entry.symbol.symbol_name, entry.symbol.dso_name)) if show_tracing_data: data = lib.GetTracingDataOfCurrentSample() if data: print('\ttracing data:') for key, value in data.items(): print('\t\t%s : %s' % (key, value)) print('')
def setUp(self): self.report_lib = ReportLib() self.report_lib.SetRecordFile( os.path.join('testdata', 'perf_with_symbols.data'))
class TestReportLib(unittest.TestCase): def setUp(self): self.report_lib = ReportLib() self.report_lib.SetRecordFile( os.path.join('testdata', 'perf_with_symbols.data')) def tearDown(self): self.report_lib.Close() def test_build_id(self): build_id = self.report_lib.GetBuildIdForPath('/data/t2') self.assertEqual(build_id, '0x70f1fe24500fc8b0d9eb477199ca1ca21acca4de') def test_symbol(self): found_func2 = False while self.report_lib.GetNextSample(): sample = self.report_lib.GetCurrentSample() symbol = self.report_lib.GetSymbolOfCurrentSample() if symbol.symbol_name == 'func2(int, int)': found_func2 = True self.assertEqual(symbol.symbol_addr, 0x4004ed) self.assertEqual(symbol.symbol_len, 0x14) self.assertTrue(found_func2) def test_sample(self): found_sample = False while self.report_lib.GetNextSample(): sample = self.report_lib.GetCurrentSample() if sample.ip == 0x4004ff and sample.time == 7637889424953: found_sample = True self.assertEqual(sample.pid, 15926) self.assertEqual(sample.tid, 15926) self.assertEqual(sample.thread_comm, 't2') self.assertEqual(sample.cpu, 5) self.assertEqual(sample.period, 694614) event = self.report_lib.GetEventOfCurrentSample() self.assertEqual(event.name, 'cpu-cycles') callchain = self.report_lib.GetCallChainOfCurrentSample() self.assertEqual(callchain.nr, 0) self.assertTrue(found_sample) def test_meta_info(self): self.report_lib.SetRecordFile( os.path.join('testdata', 'perf_with_trace_offcpu.data')) meta_info = self.report_lib.MetaInfo() self.assertTrue("simpleperf_version" in meta_info) self.assertEqual(meta_info["system_wide_collection"], "false") self.assertEqual(meta_info["trace_offcpu"], "true") self.assertEqual(meta_info["event_type_info"], "cpu-cycles,0,0\nsched:sched_switch,2,47") self.assertTrue("product_props" in meta_info) def test_event_name_from_meta_info(self): self.report_lib.SetRecordFile( os.path.join('testdata', 'perf_with_tracepoint_event.data')) event_names = set() while self.report_lib.GetNextSample(): event_names.add(self.report_lib.GetEventOfCurrentSample().name) self.assertTrue('sched:sched_switch' in event_names) self.assertTrue('cpu-cycles' in event_names) def test_record_cmd(self): self.report_lib.SetRecordFile( os.path.join('testdata', 'perf_with_trace_offcpu.data')) self.assertEqual( self.report_lib.GetRecordCmd(), "/data/local/tmp/simpleperf record --trace-offcpu --duration 2 -g ./simpleperf_runtest_run_and_sleep64" ) def test_offcpu(self): self.report_lib.SetRecordFile( os.path.join('testdata', 'perf_with_trace_offcpu.data')) total_period = 0 sleep_function_period = 0 sleep_function_name = "SleepFunction(unsigned long long)" while self.report_lib.GetNextSample(): sample = self.report_lib.GetCurrentSample() total_period += sample.period if self.report_lib.GetSymbolOfCurrentSample( ).symbol_name == sleep_function_name: sleep_function_period += sample.period continue callchain = self.report_lib.GetCallChainOfCurrentSample() for i in range(callchain.nr): if callchain.entries[ i].symbol.symbol_name == sleep_function_name: sleep_function_period += sample.period break self.assertEqual(self.report_lib.GetEventOfCurrentSample().name, 'cpu-cycles') sleep_percentage = float(sleep_function_period) / total_period self.assertGreater(sleep_percentage, 0.30)
def parse_samples(process, args): """ read record_file, and print each sample""" record_file = args.record_file symfs_dir = args.symfs kallsyms_file = args.kallsyms lib = ReportLib() lib.ShowIpForUnknownSymbol() if symfs_dir: lib.SetSymfs(symfs_dir) if record_file: lib.SetRecordFile(record_file) if kallsyms_file: lib.SetKallsymsFile(kallsyms_file) process.cmd = lib.GetRecordCmd() product_props = lib.MetaInfo().get("product_props") if product_props: tuple = product_props.split(':') process.props['ro.product.manufacturer'] = tuple[0] process.props['ro.product.model'] = tuple[1] process.props['ro.product.name'] = tuple[2] while True: sample = lib.GetNextSample() if sample is None: lib.Close() break symbol = lib.GetSymbolOfCurrentSample() callchain = lib.GetCallChainOfCurrentSample() process.get_thread(sample.tid, sample.pid).add_callchain(callchain, symbol, sample) process.num_samples += 1 if process.pid == 0: main_threads = [ thread for thread in process.threads.values() if thread.tid == thread.pid ] if main_threads: process.name = main_threads[0].name process.pid = main_threads[0].pid for thread in process.threads.values(): min_event_count = thread.event_count * args.min_callchain_percentage * 0.01 thread.flamegraph.trim_callchain(min_event_count) log_info("Parsed %s callchains." % process.num_samples)
def parse_samples(process, args, sample_filter_fn): """Read samples from record file. process: Process object args: arguments sample_filter_fn: if not None, is used to modify and filter samples. It returns false for samples should be filtered out. """ record_file = args.record_file symfs_dir = args.symfs kallsyms_file = args.kallsyms lib = ReportLib() lib.ShowIpForUnknownSymbol() if symfs_dir: lib.SetSymfs(symfs_dir) if record_file: lib.SetRecordFile(record_file) if kallsyms_file: lib.SetKallsymsFile(kallsyms_file) process.cmd = lib.GetRecordCmd() product_props = lib.MetaInfo().get("product_props") if product_props: tuple = product_props.split(':') process.props['ro.product.manufacturer'] = tuple[0] process.props['ro.product.model'] = tuple[1] process.props['ro.product.name'] = tuple[2] if lib.MetaInfo().get('trace_offcpu') == 'true': process.props['trace_offcpu'] = True if args.one_flamegraph: log_exit( "It doesn't make sense to report with --one-flamegraph for perf.data " + "recorded with --trace-offcpu." "") else: process.props['trace_offcpu'] = False while True: sample = lib.GetNextSample() if sample is None: lib.Close() break symbol = lib.GetSymbolOfCurrentSample() callchain = lib.GetCallChainOfCurrentSample() if sample_filter_fn and not sample_filter_fn(sample, symbol, callchain): continue process.add_sample(sample, symbol, callchain) if process.pid == 0: main_threads = [ thread for thread in process.threads.values() if thread.tid == thread.pid ] if main_threads: process.name = main_threads[0].name process.pid = main_threads[0].pid for thread in process.threads.values(): min_event_count = thread.num_events * args.min_callchain_percentage * 0.01 thread.flamegraph.trim_callchain(min_event_count) log_info("Parsed %s callchains." % process.num_samples)
class PprofProfileGenerator(object): def __init__(self, config): self.config = config self.lib = ReportLib() config['binary_cache_dir'] = 'binary_cache' if not os.path.isdir(config['binary_cache_dir']): config['binary_cache_dir'] = None else: self.lib.SetSymfs(config['binary_cache_dir']) if config.get('perf_data_path'): self.lib.SetRecordFile(config['perf_data_path']) kallsyms = 'binary_cache/kallsyms' if os.path.isfile(kallsyms): self.lib.SetKallsymsFile(kallsyms) self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None if config.get('pid_filters'): self.pid_filter = {int(x) for x in config['pid_filters']} else: self.pid_filter = None if config.get('tid_filters'): self.tid_filter = {int(x) for x in config['tid_filters']} else: self.tid_filter = None self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None self.profile = profile_pb2.Profile() self.profile.string_table.append('') self.string_table = {} self.sample_types = {} self.sample_map = {} self.sample_list = [] self.location_map = {} self.location_list = [] self.mapping_map = {} self.mapping_list = [] self.function_map = {} self.function_list = [] def gen(self): # 1. Process all samples in perf.data, aggregate samples. while True: report_sample = self.lib.GetNextSample() if report_sample is None: self.lib.Close() break event = self.lib.GetEventOfCurrentSample() symbol = self.lib.GetSymbolOfCurrentSample() callchain = self.lib.GetCallChainOfCurrentSample() if not self._filter_report_sample(report_sample): continue sample_type_id = self.get_sample_type_id(event.name) sample = Sample() sample.add_value(sample_type_id, 1) sample.add_value(sample_type_id + 1, report_sample.period) if self._filter_symbol(symbol): location_id = self.get_location_id(symbol.vaddr_in_file, symbol) sample.add_location_id(location_id) for i in range(callchain.nr): entry = callchain.entries[i] if self._filter_symbol(symbol): location_id = self.get_location_id(entry.ip, entry.symbol) sample.add_location_id(location_id) if sample.location_ids: self.add_sample(sample) # 2. Generate line info for locations and functions. self.gen_source_lines() # 3. Produce samples/locations/functions in profile for sample in self.sample_list: self.gen_profile_sample(sample) for mapping in self.mapping_list: self.gen_profile_mapping(mapping) for location in self.location_list: self.gen_profile_location(location) for function in self.function_list: self.gen_profile_function(function) return self.profile def _filter_report_sample(self, sample): """Return true if the sample can be used.""" if self.comm_filter: if sample.thread_comm not in self.comm_filter: return False if self.pid_filter: if sample.pid not in self.pid_filter: return False if self.tid_filter: if sample.tid not in self.tid_filter: return False return True def _filter_symbol(self, symbol): if not self.dso_filter or symbol.dso_name in self.dso_filter: return True return False def get_string_id(self, str_value): if not str_value: return 0 str_id = self.string_table.get(str_value) if str_id is not None: return str_id str_id = len(self.string_table) + 1 self.string_table[str_value] = str_id self.profile.string_table.append(str_value) return str_id def get_string(self, str_id): return self.profile.string_table[str_id] def get_sample_type_id(self, name): sample_type_id = self.sample_types.get(name) if sample_type_id is not None: return sample_type_id sample_type_id = len(self.profile.sample_type) sample_type = self.profile.sample_type.add() sample_type.type = self.get_string_id('event_' + name + '_samples') sample_type.unit = self.get_string_id('count') sample_type = self.profile.sample_type.add() sample_type.type = self.get_string_id('event_' + name + '_count') sample_type.unit = self.get_string_id('count') self.sample_types[name] = sample_type_id return sample_type_id def get_location_id(self, ip, symbol): mapping_id = self.get_mapping_id(symbol.mapping[0], symbol.dso_name) location = Location(mapping_id, ip, symbol.vaddr_in_file) function_id = self.get_function_id(symbol.symbol_name, symbol.dso_name, symbol.symbol_addr) if function_id: # Add Line only when it has a valid function id, see http://b/36988814. # Default line info only contains the function name line = Line() line.function_id = function_id location.lines.append(line) exist_location = self.location_map.get(location.key) if exist_location: return exist_location.id # location_id starts from 1 location.id = len(self.location_list) + 1 self.location_list.append(location) self.location_map[location.key] = location return location.id def get_mapping_id(self, report_mapping, filename): filename_id = self.get_string_id(filename) build_id = self.lib.GetBuildIdForPath(filename) if build_id and build_id[0:2] == "0x": build_id = build_id[2:] build_id_id = self.get_string_id(build_id) mapping = Mapping(report_mapping.start, report_mapping.end, report_mapping.pgoff, filename_id, build_id_id) exist_mapping = self.mapping_map.get(mapping.key) if exist_mapping: return exist_mapping.id # mapping_id starts from 1 mapping.id = len(self.mapping_list) + 1 self.mapping_list.append(mapping) self.mapping_map[mapping.key] = mapping return mapping.id def get_mapping(self, mapping_id): return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None def get_function_id(self, name, dso_name, vaddr_in_file): if name == 'unknown': return 0 function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file) exist_function = self.function_map.get(function.key) if exist_function: return exist_function.id # function_id starts from 1 function.id = len(self.function_list) + 1 self.function_list.append(function) self.function_map[function.key] = function return function.id def get_function(self, function_id): return self.function_list[function_id - 1] if function_id > 0 else None def add_sample(self, sample): exist_sample = self.sample_map.get(sample.key) if exist_sample: exist_sample.add_values(sample.values) else: self.sample_list.append(sample) self.sample_map[sample.key] = sample def gen_source_lines(self): # 1. Create Addr2line instance if not self.config.get('binary_cache_dir'): log_info("Can't generate line information because binary_cache is missing.") return if not find_tool_path('addr2line', self.config['ndk_path']): log_info("Can't generate line information because can't find addr2line.") return addr2line = Addr2Nearestline(self.config['ndk_path'], self.config['binary_cache_dir'], True) # 2. Put all needed addresses to it. for location in self.location_list: mapping = self.get_mapping(location.mapping_id) dso_name = self.get_string(mapping.filename_id) if location.lines: function = self.get_function(location.lines[0].function_id) addr2line.add_addr(dso_name, function.vaddr_in_dso, location.vaddr_in_dso) for function in self.function_list: dso_name = self.get_string(function.dso_name_id) addr2line.add_addr(dso_name, function.vaddr_in_dso, function.vaddr_in_dso) # 3. Generate source lines. addr2line.convert_addrs_to_lines() # 4. Annotate locations and functions. for location in self.location_list: if not location.lines: continue mapping = self.get_mapping(location.mapping_id) dso_name = self.get_string(mapping.filename_id) dso = addr2line.get_dso(dso_name) if not dso: continue sources = addr2line.get_addr_source(dso, location.vaddr_in_dso) if not sources: continue for (source_id, source) in enumerate(sources): source_file, source_line, function_name = source function_id = self.get_function_id(function_name, dso_name, 0) if function_id == 0: continue if source_id == 0: # Clear default line info location.lines = [] location.lines.append(self.add_line(source_file, source_line, function_id)) for function in self.function_list: dso_name = self.get_string(function.dso_name_id) if function.vaddr_in_dso: dso = addr2line.get_dso(dso_name) if not dso: continue sources = addr2line.get_addr_source(dso, function.vaddr_in_dso) if sources: source_file, source_line, _ = sources[0] function.source_filename_id = self.get_string_id(source_file) function.start_line = source_line def add_line(self, source_file, source_line, function_id): line = Line() function = self.get_function(function_id) function.source_filename_id = self.get_string_id(source_file) line.function_id = function_id line.line = source_line return line def gen_profile_sample(self, sample): profile_sample = self.profile.sample.add() profile_sample.location_id.extend(sample.location_ids) sample_type_count = len(self.sample_types) * 2 values = [0] * sample_type_count for sample_type_id in sample.values: values[sample_type_id] = sample.values[sample_type_id] profile_sample.value.extend(values) def gen_profile_mapping(self, mapping): profile_mapping = self.profile.mapping.add() profile_mapping.id = mapping.id profile_mapping.memory_start = mapping.memory_start profile_mapping.memory_limit = mapping.memory_limit profile_mapping.file_offset = mapping.file_offset profile_mapping.filename = mapping.filename_id profile_mapping.build_id = mapping.build_id_id profile_mapping.has_filenames = True profile_mapping.has_functions = True if self.config.get('binary_cache_dir'): profile_mapping.has_line_numbers = True profile_mapping.has_inline_frames = True else: profile_mapping.has_line_numbers = False profile_mapping.has_inline_frames = False def gen_profile_location(self, location): profile_location = self.profile.location.add() profile_location.id = location.id profile_location.mapping_id = location.mapping_id profile_location.address = location.address for i in range(len(location.lines)): line = profile_location.line.add() line.function_id = location.lines[i].function_id line.line = location.lines[i].line def gen_profile_function(self, function): profile_function = self.profile.function.add() profile_function.id = function.id profile_function.name = function.name_id profile_function.system_name = function.name_id profile_function.filename = function.source_filename_id profile_function.start_line = function.start_line
class PprofProfileGenerator(object): def __init__(self, config): self.config = config self.lib = ReportLib() config['binary_cache_dir'] = 'binary_cache' if not os.path.isdir(config['binary_cache_dir']): config['binary_cache_dir'] = None else: self.lib.SetSymfs(config['binary_cache_dir']) if config.get('perf_data_path'): self.lib.SetRecordFile(config['perf_data_path']) kallsyms = 'binary_cache/kallsyms' if os.path.isfile(kallsyms): self.lib.SetKallsymsFile(kallsyms) if config.get('show_art_frames'): self.lib.ShowArtFrames() self.comm_filter = set( config['comm_filters']) if config.get('comm_filters') else None if config.get('pid_filters'): self.pid_filter = {int(x) for x in config['pid_filters']} else: self.pid_filter = None if config.get('tid_filters'): self.tid_filter = {int(x) for x in config['tid_filters']} else: self.tid_filter = None self.dso_filter = set( config['dso_filters']) if config.get('dso_filters') else None self.max_chain_length = config['max_chain_length'] self.profile = profile_pb2.Profile() self.profile.string_table.append('') self.string_table = {} self.sample_types = {} self.sample_map = {} self.sample_list = [] self.location_map = {} self.location_list = [] self.mapping_map = {} self.mapping_list = [] self.function_map = {} self.function_list = [] # Map from dso_name in perf.data to (binary path, build_id). self.binary_map = {} self.read_elf = ReadElf(self.config['ndk_path']) def gen(self): # 1. Process all samples in perf.data, aggregate samples. while True: report_sample = self.lib.GetNextSample() if report_sample is None: self.lib.Close() break event = self.lib.GetEventOfCurrentSample() symbol = self.lib.GetSymbolOfCurrentSample() callchain = self.lib.GetCallChainOfCurrentSample() if not self._filter_report_sample(report_sample): continue sample_type_id = self.get_sample_type_id(event.name) sample = Sample() sample.add_value(sample_type_id, 1) sample.add_value(sample_type_id + 1, report_sample.period) if self._filter_symbol(symbol): location_id = self.get_location_id(report_sample.ip, symbol) sample.add_location_id(location_id) for i in range(max(0, callchain.nr - self.max_chain_length), callchain.nr): entry = callchain.entries[i] if self._filter_symbol(symbol): location_id = self.get_location_id(entry.ip, entry.symbol) sample.add_location_id(location_id) if sample.location_ids: self.add_sample(sample) # 2. Generate line info for locations and functions. self.gen_source_lines() # 3. Produce samples/locations/functions in profile for sample in self.sample_list: self.gen_profile_sample(sample) for mapping in self.mapping_list: self.gen_profile_mapping(mapping) for location in self.location_list: self.gen_profile_location(location) for function in self.function_list: self.gen_profile_function(function) return self.profile def _filter_report_sample(self, sample): """Return true if the sample can be used.""" if self.comm_filter: if sample.thread_comm not in self.comm_filter: return False if self.pid_filter: if sample.pid not in self.pid_filter: return False if self.tid_filter: if sample.tid not in self.tid_filter: return False return True def _filter_symbol(self, symbol): if not self.dso_filter or symbol.dso_name in self.dso_filter: return True return False def get_string_id(self, str_value): if not str_value: return 0 str_id = self.string_table.get(str_value) if str_id is not None: return str_id str_id = len(self.string_table) + 1 self.string_table[str_value] = str_id self.profile.string_table.append(str_value) return str_id def get_string(self, str_id): return self.profile.string_table[str_id] def get_sample_type_id(self, name): sample_type_id = self.sample_types.get(name) if sample_type_id is not None: return sample_type_id sample_type_id = len(self.profile.sample_type) sample_type = self.profile.sample_type.add() sample_type.type = self.get_string_id('event_' + name + '_samples') sample_type.unit = self.get_string_id('count') sample_type = self.profile.sample_type.add() sample_type.type = self.get_string_id('event_' + name + '_count') sample_type.unit = self.get_string_id('count') self.sample_types[name] = sample_type_id return sample_type_id def get_location_id(self, ip, symbol): binary_path, build_id = self.get_binary(symbol.dso_name) mapping_id = self.get_mapping_id(symbol.mapping[0], binary_path, build_id) location = Location(mapping_id, ip, symbol.vaddr_in_file) function_id = self.get_function_id(symbol.symbol_name, binary_path, symbol.symbol_addr) if function_id: # Add Line only when it has a valid function id, see http://b/36988814. # Default line info only contains the function name line = Line() line.function_id = function_id location.lines.append(line) exist_location = self.location_map.get(location.key) if exist_location: return exist_location.id # location_id starts from 1 location.id = len(self.location_list) + 1 self.location_list.append(location) self.location_map[location.key] = location return location.id def get_mapping_id(self, report_mapping, filename, build_id): filename_id = self.get_string_id(filename) build_id_id = self.get_string_id(build_id) mapping = Mapping(report_mapping.start, report_mapping.end, report_mapping.pgoff, filename_id, build_id_id) exist_mapping = self.mapping_map.get(mapping.key) if exist_mapping: return exist_mapping.id # mapping_id starts from 1 mapping.id = len(self.mapping_list) + 1 self.mapping_list.append(mapping) self.mapping_map[mapping.key] = mapping return mapping.id def get_binary(self, dso_name): """ Return (binary_path, build_id) for a given dso_name. """ value = self.binary_map.get(dso_name) if value: return value binary_path = dso_name build_id = '' # The build ids in perf.data are padded to 20 bytes, but pprof needs without padding. # So read build id from the binary in binary_cache, and check it with build id in # perf.data. build_id_in_perf_data = self.lib.GetBuildIdForPath(dso_name) # Try elf_path in binary cache. elf_path = find_real_dso_path(dso_name, self.config['binary_cache_dir']) if elf_path: elf_build_id = self.read_elf.get_build_id(elf_path, False) if build_id_in_perf_data: match = build_id_in_perf_data == self.read_elf.pad_build_id( elf_build_id) else: # odex files generated by ART on Android O don't contain build id. match = not elf_build_id if match: build_id = elf_build_id binary_path = elf_path # When there is no matching elf_path, try converting build_id in perf.data. if not build_id and build_id_in_perf_data.startswith('0x'): # Fallback to the way used by TrimZeroesFromBuildIDString() in quipper. build_id = build_id_in_perf_data[2:] # remove '0x' padding = '0' * 8 while build_id.endswith(padding): build_id = build_id[:-len(padding)] self.binary_map[dso_name] = (binary_path, build_id) return (binary_path, build_id) def get_mapping(self, mapping_id): return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None def get_function_id(self, name, dso_name, vaddr_in_file): if name == 'unknown': return 0 function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file) exist_function = self.function_map.get(function.key) if exist_function: return exist_function.id # function_id starts from 1 function.id = len(self.function_list) + 1 self.function_list.append(function) self.function_map[function.key] = function return function.id def get_function(self, function_id): return self.function_list[function_id - 1] if function_id > 0 else None def add_sample(self, sample): exist_sample = self.sample_map.get(sample.key) if exist_sample: exist_sample.add_values(sample.values) else: self.sample_list.append(sample) self.sample_map[sample.key] = sample def gen_source_lines(self): # 1. Create Addr2line instance if not self.config.get('binary_cache_dir'): log_info( "Can't generate line information because binary_cache is missing." ) return if not find_tool_path('llvm-symbolizer', self.config['ndk_path']): log_info( "Can't generate line information because can't find llvm-symbolizer." ) return # We have changed dso names to paths in binary_cache in self.get_binary(). So no need to # pass binary_cache_dir to addr2line. addr2line = Addr2Nearestline(self.config['ndk_path'], None, True) # 2. Put all needed addresses to it. for location in self.location_list: mapping = self.get_mapping(location.mapping_id) dso_name = self.get_string(mapping.filename_id) if location.lines: function = self.get_function(location.lines[0].function_id) addr2line.add_addr(dso_name, function.vaddr_in_dso, location.vaddr_in_dso) for function in self.function_list: dso_name = self.get_string(function.dso_name_id) addr2line.add_addr(dso_name, function.vaddr_in_dso, function.vaddr_in_dso) # 3. Generate source lines. addr2line.convert_addrs_to_lines() # 4. Annotate locations and functions. for location in self.location_list: if not location.lines: continue mapping = self.get_mapping(location.mapping_id) dso_name = self.get_string(mapping.filename_id) dso = addr2line.get_dso(dso_name) if not dso: continue sources = addr2line.get_addr_source(dso, location.vaddr_in_dso) if not sources: continue for (source_id, source) in enumerate(sources): source_file, source_line, function_name = source function_id = self.get_function_id(function_name, dso_name, 0) if function_id == 0: continue if source_id == 0: # Clear default line info location.lines = [] location.lines.append( self.add_line(source_file, source_line, function_id)) for function in self.function_list: dso_name = self.get_string(function.dso_name_id) if function.vaddr_in_dso: dso = addr2line.get_dso(dso_name) if not dso: continue sources = addr2line.get_addr_source(dso, function.vaddr_in_dso) if sources: source_file, source_line, _ = sources[0] function.source_filename_id = self.get_string_id( source_file) function.start_line = source_line def add_line(self, source_file, source_line, function_id): line = Line() function = self.get_function(function_id) function.source_filename_id = self.get_string_id(source_file) line.function_id = function_id line.line = source_line return line def gen_profile_sample(self, sample): profile_sample = self.profile.sample.add() profile_sample.location_id.extend(sample.location_ids) sample_type_count = len(self.sample_types) * 2 values = [0] * sample_type_count for sample_type_id in sample.values: values[sample_type_id] = sample.values[sample_type_id] profile_sample.value.extend(values) def gen_profile_mapping(self, mapping): profile_mapping = self.profile.mapping.add() profile_mapping.id = mapping.id profile_mapping.memory_start = mapping.memory_start profile_mapping.memory_limit = mapping.memory_limit profile_mapping.file_offset = mapping.file_offset profile_mapping.filename = mapping.filename_id profile_mapping.build_id = mapping.build_id_id profile_mapping.has_filenames = True profile_mapping.has_functions = True if self.config.get('binary_cache_dir'): profile_mapping.has_line_numbers = True profile_mapping.has_inline_frames = True else: profile_mapping.has_line_numbers = False profile_mapping.has_inline_frames = False def gen_profile_location(self, location): profile_location = self.profile.location.add() profile_location.id = location.id profile_location.mapping_id = location.mapping_id profile_location.address = location.address for i in range(len(location.lines)): line = profile_location.line.add() line.function_id = location.lines[i].function_id line.line = location.lines[i].line def gen_profile_function(self, function): profile_function = self.profile.function.add() profile_function.id = function.id profile_function.name = function.name_id profile_function.system_name = function.name_id profile_function.filename = function.source_filename_id profile_function.start_line = function.start_line