class PlotThread(QtCore.QThread): global interval update_sig = QtCore.pyqtSignal(dict) def __init__(self): super(PlotThread, self).__init__() self.d = {} self.first = True def handle_output(self, cpu, data, size): event = self.b['events'].event(data) self.d['miss ' + str(event.cpu)] = event.miss self.d['hit ' + str(event.cpu)] = event.hit if len(self.d.keys()) >= 8: if self.first: self.first = False else: self.update_sig.emit(self.d) self.d = {} def run(self): self.b = BPF(text=prog, cflags=['-DNUM_CPUS=%d' % multiprocessing.cpu_count()]) self.b['cache_misses'].open_perf_event(PerfType.HARDWARE, PerfHWConfig.CACHE_MISSES) self.b['cache_hits'].open_perf_event(PerfType.HARDWARE, PerfHWConfig.CACHE_REFERENCES) self.b['events'].open_perf_buffer(self.handle_output) self.b.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK, fn_name='do_perf', sample_freq=int(1.0 / interval)) while True: try: self.b.perf_buffer_poll() except KeyboardInterrupt: exit()
usdt = USDT(pid=pid) usdt.enable_probe("hhvm_stack", "on_hhvm_event_hook") print("Enabled tracing on {}\n".format(pid)) usdts.append(usdt) b = BPF(text=bpf_text, usdt_contexts=usdts, cflags=cflags) # Track pids to fire signals on hhvm_pids = b.get_table("hhvm_pids") for pid in args.pids: hhvm_pids[ct.c_int(pid)] = ct.c_int(1) # Collect a stack every 10m cycles b.attach_perf_event( ev_type=PerfType.HARDWARE, ev_config=PerfHWConfig.CPU_CYCLES, fn_name="on_event", sample_period=10000000, ) class HackSymbol: """Struct from strobelight_hhvm_structs.h""" def __init__(self, bcc_obj): self.line = bcc_obj.line encoding = "utf-8" self.file_name = bcc_obj.file_name.decode(encoding) self.class_name = bcc_obj.class_name.decode(encoding) self.function = bcc_obj.function.decode(encoding) def __str__(self): return "{}:{} {}::{}".format(self.file_name, self.line,
class BundleBpf: """ Creates a BPF object. This is the main object for defining a BPF program, and interacting with its output. Syntax: BPF({text=BPF_program | src_file=filename} [, usdt_contexts=[USDT_object, ...]] [, cflags=[arg1, ...]] [, debug=int] ) Exactly one of text or src_file must be supplied (not both). """ def __init__(self, program, serializer, hook): """Initialize Bundle BPF Perf event class. :param program: BPF C code. :param serializer: Metric serializer. :param hook: Process ID. :type program: dict :type serializer: Serializer :type hook: int """ self.obj = None self.code = program[u"code"] self.metrics = program[u"metrics"] self.events = program[u"events"] self.api_replies_list = list() self.serializer = serializer self.hook = hook self.obj = BPF(text=self.code) def attach(self, duration): """ Attach events to BPF. :param duration: Trial duration. :type duration: int """ try: for event in self.events: self.obj.attach_perf_event( ev_type=event[u"type"], ev_config=event[u"name"], fn_name=event[u"target"], sample_period=duration ) except AttributeError: getLogger("console_stderr").error(u"Could not attach BPF events!") sys.exit(Constants.err_linux_attach) def detach(self): """ Dettach events from BPF. """ try: for event in self.events: self.obj.detach_perf_event( ev_type=event[u"type"], ev_config=event[u"name"] ) except AttributeError: getLogger("console_stderr").error(u"Could not detach BPF events!") sys.exit(Constants.err_linux_detach) def fetch_data(self): """ Fetch data by invoking API calls to BPF. """ self.serializer.create(metrics=self.metrics) max_len = {"cpu": 3, "pid": 3, "name": 4, "value": 5} text = "" table_name = "" item_list = [] for _, metric_list in self.metrics.items(): for metric in metric_list: for (key, val) in self.obj.get_table(metric[u"name"]).items(): item = dict() labels = dict() item[u"name"] = metric[u"name"] item[u"value"] = val.value for label in metric[u"labelnames"]: labels[label] = getattr(key, label) item[u"labels"] = labels item[u'labels'][u'name'] = \ item[u'labels'][u'name'].decode(u'utf-8') if item[u"labels"][u"name"] == u"python3": continue if len(str(item[u'labels'][u'cpu'])) > max_len["cpu"]: max_len["cpu"]= len(str(item[u'labels'][u'cpu'])) if len(str(item[u'labels'][u'pid'])) > max_len[u"pid"]: max_len[u"pid"] = len(str(item[u'labels'][u'pid'])) if len(str(item[u'labels'][u'name'])) > max_len[u"name"]: max_len[u"name"] = len(str(item[u'labels'][u'name'])) if len(str(item[u'value'])) > max_len[u"value"]: max_len[u"value"] = len(str(item[u'value'])) self.api_replies_list.append(item) item_list.append(item) item_list = sorted(item_list, key=lambda x: x['labels']['cpu']) item_list = sorted(item_list, key=lambda x: x['name']) for it in item_list: if table_name != it[u"name"]: table_name = it[u"name"] text += f"\n==={table_name}===\n" \ f"cpu {u' ' * (max_len[u'cpu'] - 3)} " \ f"pid {u' ' * (max_len[u'pid'] - 3)} " \ f"name {u' ' * (max_len[u'name'] - 4)} " \ f"value {u' ' * (max_len[u'value'] - 5)}\n" text += ( f"""{str(it[u'labels'][u'cpu']) + u' ' * (max_len[u"cpu"] - len(str(it[u'labels'][u'cpu'])))} """ f"""{str(it[u'labels'][u'pid']) + u' ' * (max_len[u"pid"] - len(str(it[u'labels'][u'pid'])))} """ f"""{str(it[u'labels'][u'name']) + u' ' * (max_len[u"name"] - len(str(it[u'labels'][u'name'])))} """ f"""{str(it[u'value']) + u' ' * (max_len[u"value"] - len(str(it[u'value'])))}\n""") getLogger(u"console_stdout").info(text) def process_data(self): """ Post process API replies. """ for item in self.api_replies_list: self.serializer.serialize( metric=item[u"name"], labels=item[u"labels"], item=item )
############################ TCP/UDP probe Accept ############################### bT.attach_kretprobe(event='inet_csk_accept',fn_name="trace_tcp_accept") bU.attach_kretprobe(event="udp_recvmsg",fn_name="trace_udp_rcv") ########################### UDP probe Connect ################################### bU.attach_kprobe(event="udp_sendmsg",fn_name="trace_connect_entryUDP") bU.attach_kretprobe(event="udp_sendmsg", fn_name="trace_udp_connect") ########################### Pagefault probe ##################################### f = BPF(src_file="source_Procfault.c") f.attach_perf_event( ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.PAGE_FAULTS_MIN, fn_name="page_min_flt",sample_period=0,sample_freq=49) ################################################################################# if args.time: print("Running for {} seconds or hit Ctrl-C to end.".format(args.time)) timeRunning = args.time else: print("Running for {} seconds or hit Ctrl-C to end.".format(1000)) timeRunning = 1000 try: sleep(float(timeRunning)) except KeyboardInterrupt: signal.signal(signal.SIGINT, signal_ignore) print(" ")
print(" on CPU#{}".format(args.cpu), end="") if duration < 99999999: print(" for %d secs." % duration) else: print("... Hit Ctrl-C to end.") if debug or args.ebpf: print(bpf_text) if args.ebpf: exit() # initialize BPF & perf_events b = BPF(text=bpf_text) b.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event", sample_period=sample_period, sample_freq=sample_freq, cpu=args.cpu) # signal handler def signal_ignore(signal, frame): print() # # Output Report # # collect samples try:
result.ref_cycles = ref; last_sample.insert(&cpu, &result); } } ''' max_cpus = len(utils.get_online_cpus()) b = BPF(text=code, cflags=['-DMAX_CPUS=%s' % str(max_cpus)]) # Cycles and Ref Cycles counters are required to measure frequency. b['cycles'].open_perf_event(PerfType.HARDWARE, PerfHWConfig.CPU_CYCLES) b['ref_cycles'].open_perf_event(PerfType.HARDWARE, PerfHWConfig.REF_CPU_CYCLES) # A dummy perf event which will get triggered at every Sample Frequency. b.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK, fn_name='get_perf_counters', sample_freq=option.sample_freq) def print_data(cpu, data, size): e = b["output"].event(data) print "%-4d %-16d %-16d %-16.2f" % (cpu, e.cycles, e.ref_cycles, e.cycles * option.tsc_freq / e.ref_cycles) print "Counters Data" print "%-4s %-16s %-16s %-16s" % ('CPU', 'CLOCK', 'REF-CYCLES', 'FREQ') b['output'].open_perf_buffer(print_data) while True:
if check_runnable_weight_field(): bpf_text = bpf_text.replace('RUNNABLE_WEIGHT_FIELD', 'unsigned long runnable_weight;') else: bpf_text = bpf_text.replace('RUNNABLE_WEIGHT_FIELD', '') if debug or args.ebpf: print(bpf_text) if args.ebpf: exit() # initialize BPF & perf_events b = BPF(text=bpf_text) b.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event", sample_period=0, sample_freq=frequency) print("Sampling run queue length... Hit Ctrl-C to end.") # output exiting = 0 if args.interval else 1 dist = b.get_table("dist") while (1): try: sleep(int(args.interval)) except KeyboardInterrupt: exiting = 1 print()
struct data_t data = {.ts = now, .cpu = cpu, .len = len}; events.perf_submit(ctx, &data, sizeof(data)); return 0; } """ # code substitutions if debug: print(bpf_text) # initialize BPF & perf_events b = BPF(text=bpf_text) # TODO: check for HW counters first and use if more accurate b.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.TASK_CLOCK, fn_name="do_perf_event", sample_period=0, sample_freq=frequency) if args.csv: if args.timestamp: print("TIME", end=",") print("TIMESTAMP_ns", end=",") print(",".join("CPU" + str(c) for c in xrange(ncpu)), end="") if args.fullcsv: print(",", end="") print(",".join("OFFSET_ns_CPU" + str(c) for c in xrange(ncpu)), end="") print() else: print(("Sampling run queues... Output every %s seconds. " + "Hit Ctrl-C to end.") % args.interval) class Data(ct.Structure):
u64 zero = 0, *val; val = ref_count.lookup_or_init(&key, &zero); (*val) += ctx->sample_period; return 0; } """ if args.ebpf: print(bpf_text) exit() b = BPF(text=bpf_text) b.attach_perf_event( ev_type=PerfType.HARDWARE, ev_config=PerfHWConfig.CACHE_MISSES, fn_name="on_cache_miss", sample_period=args.sample_period) b.attach_perf_event( ev_type=PerfType.HARDWARE, ev_config=PerfHWConfig.CACHE_REFERENCES, fn_name="on_cache_ref", sample_period=args.sample_period) print("Running for {} seconds or hit Ctrl-C to end.".format(args.duration)) try: sleep(float(args.duration)) except KeyboardInterrupt: signal.signal(signal.SIGINT, lambda signal, frame: print()) miss_count = {} for (k, v) in b.get_table('miss_count').items(): miss_count[(k.pid, k.cpu, k.name)] = v.value
class BpfCollector: def __init__(self, topology, debug, power_measure): self.topology = topology self.debug = debug self.power_measure = power_measure bpf_code_path = os.path.dirname(os.path.abspath(__file__)) \ + "/../bpf/bpf_monitor.c" if debug is False: if self.power_measure == True: self.bpf_program = BPF(src_file=bpf_code_path, \ cflags=["-DNUM_CPUS=%d" % multiprocessing.cpu_count(), \ "-DNUM_SOCKETS=%d" % len(self.topology.get_sockets()), \ "-DPERFORMANCE_COUNTERS"]) else: self.bpf_program = BPF(src_file=bpf_code_path, \ cflags=["-DNUM_CPUS=%d" % multiprocessing.cpu_count(), \ "-DNUM_SOCKETS=%d" % len(self.topology.get_sockets())]) else: self.bpf_program = BPF(src_file=bpf_code_path, \ cflags=["-DNUM_CPUS=%d" % multiprocessing.cpu_count(), \ "-DNUM_SOCKETS=%d" % len(self.topology.get_sockets()), \ "-DDEBUG"]) self.processors = self.bpf_program.get_table("processors") self.pids = self.bpf_program.get_table("pids") self.idles = self.bpf_program.get_table("idles") self.bpf_config = self.bpf_program.get_table("conf") self.bpf_global_timestamps = self.bpf_program.get_table( "global_timestamps") self.selector = 0 self.SELECTOR_DIM = 2 self.timeslice = 1000000000 self.timed_capture = False #self.bpf_program["cpu_cycles"].open_perf_event(PerfType.HARDWARE, \ # PerfHWConfig.CPU_CYCLES) # 4 means RAW_TYPE # int("73003c",16) is the hex for UNHALTED_CORE_CYCLES for any thread # int("53003c",16) is the hex for UNHALTED_CORE_CYCLES # int("5300c0",16) is the hex for INSTRUCTION_RETIRED if self.power_measure == True: self.bpf_program["cycles_core"].open_perf_event( 4, int("73003c", 16)) self.bpf_program["cycles_thread"].open_perf_event( 4, int("53003c", 16)) self.bpf_program["instr_thread"].open_perf_event( 4, int("5300c0", 16)) self.bpf_program["cache_misses"].open_perf_event( PerfType.HARDWARE, PerfHWConfig.CACHE_MISSES) self.bpf_program["cache_refs"].open_perf_event( PerfType.HARDWARE, PerfHWConfig.CACHE_REFERENCES) def print_event(self, cpu, data, size): event = ct.cast(data, ct.POINTER(ErrorCode)).contents if event.err >= 0: print("core: " + str(cpu) + " topology counters overflow or initialized with pid: " + str(event.err)) elif event.err < -1: # exclude the BPF_PROCEED_WITH_DEBUG_MODE event, since it is used # just to advance computation for the timed capture print("core: " + str(cpu) + " " + str(BPFErrors.error_dict[event.err])) def start_capture(self, timeslice): for key, value in self.topology.get_new_bpf_topology().items(): self.processors[ct.c_ulonglong(key)] = value self.timed_capture = False self.timeslice = timeslice self.bpf_config[ct.c_int(0)] = ct.c_uint( self.selector) # current selector self.bpf_config[ct.c_int(1)] = ct.c_uint(self.selector) # old selector self.bpf_config[ct.c_int(2)] = ct.c_uint(self.timeslice) # timeslice self.bpf_config[ct.c_int(3)] = ct.c_uint(0) # switch count if self.debug == True: self.bpf_program["err"].open_perf_buffer(self.print_event, page_cnt=256) self.bpf_program.attach_tracepoint(tp="sched:sched_switch", \ fn_name="trace_switch") self.bpf_program.attach_tracepoint(tp="sched:sched_process_exit", \ fn_name="trace_exit") def start_timed_capture(self, count=0, frequency=0): if frequency: sample_freq = frequency sample_period = 0 self.timeslice = int((1 / float(frequency)) * 1000000000) elif count: sample_freq = 0 sample_period = count self.timeslice = int(sample_period * 1000000000) else: # If user didn't specify anything, use default 49Hz sampling sample_freq = 49 sample_period = 0 self.timeslice = int((1 / float(frequency)) * 1000000000) self.timed_capture = True for key, value in self.topology.get_new_bpf_topology().items(): self.processors[ct.c_ulonglong(key)] = value self.bpf_config[ct.c_int(0)] = ct.c_uint( self.selector) # current selector self.bpf_config[ct.c_int(1)] = ct.c_uint(self.selector) # old selector self.bpf_config[ct.c_int(2)] = ct.c_uint(self.timeslice) # timeslice self.bpf_config[ct.c_int(3)] = ct.c_uint(0) # switch count if self.debug == True: self.bpf_program["err"].open_perf_buffer(self.print_event, page_cnt=256) self.bpf_program.attach_tracepoint(tp="sched:sched_switch", \ fn_name="trace_switch") self.bpf_program.attach_tracepoint(tp="sched:sched_process_exit", \ fn_name="trace_exit") self.bpf_program.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK, fn_name="timed_trace", sample_period=sample_period, sample_freq=sample_freq) def stop_capture(self): self.bpf_program.detach_tracepoint(tp="sched:sched_switch") self.bpf_program.detach_tracepoint(tp="sched:sched_process_exit") def get_new_sample(self, sample_controller, rapl_monitor): sample = self._get_new_sample(rapl_monitor) if not self.timed_capture: sample_controller.compute_sleep_time( sample.get_sched_switch_count()) self.timeslice = sample_controller.get_timeslice() self.bpf_config[ct.c_int(2)] = ct.c_uint( self.timeslice) # timeslice if self.debug == True: self.bpf_program.kprobe_poll() return sample def _get_new_sample(self, rapl_monitor): total_execution_time = 0.0 sched_switch_count = self.bpf_config[ct.c_int(3)].value tsmax = 0 # Initialize the weighted cycles for each core to 0 total_weighted_cycles = [] for socket in self.topology.get_sockets(): total_weighted_cycles.append(0) # We use a binary selector so that while userspace is reading events # using selector 0 we write events using selector 1 and vice versa. # Here we initialize it to 0 and set the number of slots used for # read/write equal to the number of sockets * the number of selectors read_selector = 0 total_slots_length = len( self.topology.get_sockets()) * self.SELECTOR_DIM # Every time we get a new sample we want to switch the selector we are using if self.selector == 0: self.selector = 1 read_selector = 0 else: self.selector = 0 read_selector = 1 rapl_measurement = [] package_diff = 0 core_diff = 0 dram_diff = 0 if self.power_measure == True: # Get new sample from rapl right before changing selector in eBPF rapl_measurement = rapl_monitor.get_rapl_measure() package_diff = rapl_measurement["package"] core_diff = rapl_measurement["core"] dram_diff = rapl_measurement["dram"] # Propagate the update of the selector to the eBPF program self.bpf_config[ct.c_int(0)] = ct.c_uint(self.selector) pid_dict = {} tsmax = self.bpf_global_timestamps[ct.c_int(read_selector)].value # Add the count of clock cycles for each active process to the total # number of clock cycles of the socket for key, data in self.pids.items(): if data.ts[read_selector] + self.timeslice > tsmax: total_execution_time = total_execution_time + float( data.time_ns[read_selector]) / 1000000 if self.power_measure == True: for multisocket_selector in range(read_selector, total_slots_length, self.SELECTOR_DIM): # Compute the number of total weighted cycles per socket cycles_index = int(multisocket_selector / self.SELECTOR_DIM) if data.ts[read_selector] + self.timeslice > tsmax: total_weighted_cycles[ cycles_index] = total_weighted_cycles[ cycles_index] + data.weighted_cycles[ multisocket_selector] # Add the count of clock cycles for each idle process to the total # number of clock cycles of the socket for key, data in self.idles.items(): if data.ts[read_selector] + self.timeslice > tsmax: total_execution_time = total_execution_time + float( data.time_ns[read_selector]) / 1000000 if self.power_measure == True: for multisocket_selector in range(read_selector, total_slots_length, self.SELECTOR_DIM): # Compute the number of total weighted cycles per socket cycles_index = int(multisocket_selector / self.SELECTOR_DIM) if data.ts[read_selector] + self.timeslice > tsmax: total_weighted_cycles[ cycles_index] = total_weighted_cycles[ cycles_index] + data.weighted_cycles[ multisocket_selector] if self.power_measure == True: # Compute package/core/dram power in mW from RAPL samples package_power = [ package_diff[skt].power_milliw() for skt in self.topology.get_sockets() ] core_power = [ core_diff[skt].power_milliw() for skt in self.topology.get_sockets() ] dram_power = [ dram_diff[skt].power_milliw() for skt in self.topology.get_sockets() ] total_power = { "package": sum(package_power), "core": sum(core_power), "dram": sum(dram_power) } else: total_power = {"package": 0, "core": 0, "dram": 0} for key, data in self.pids.items(): proc_info = ProcessInfo(len(self.topology.get_sockets())) proc_info.set_pid(data.pid) proc_info.set_tgid(data.tgid) proc_info.set_comm(data.comm) proc_info.set_cycles(data.cycles[read_selector]) proc_info.set_instruction_retired( data.instruction_retired[read_selector]) proc_info.set_cache_misses(data.cache_misses[read_selector]) proc_info.set_cache_refs(data.cache_refs[read_selector]) proc_info.set_time_ns(data.time_ns[read_selector]) add_proc = False for multisocket_selector in range(read_selector, total_slots_length, self.SELECTOR_DIM): if data.ts[read_selector] + self.timeslice > tsmax: socket_info = SocketProcessItem() socket_info.set_weighted_cycles( data.weighted_cycles[multisocket_selector]) socket_info.set_ts(data.ts[read_selector]) proc_info.set_socket_data( int(multisocket_selector / self.SELECTOR_DIM), socket_info) add_proc = True if add_proc: pid_dict[data.pid] = proc_info if self.power_measure == True: proc_info.set_power( self._get_pid_power(proc_info, total_weighted_cycles, core_power)) else: proc_info.set_power(0) proc_info.compute_cpu_usage_millis(float(total_execution_time), multiprocessing.cpu_count()) for key, data in self.idles.items(): proc_info = ProcessInfo(len(self.topology.get_sockets())) proc_info.set_pid(data.pid) proc_info.set_tgid(-1 * (1 + int(key.value))) proc_info.set_comm(data.comm) proc_info.set_cycles(data.cycles[read_selector]) proc_info.set_instruction_retired( data.instruction_retired[read_selector]) proc_info.set_cache_misses(data.cache_misses[read_selector]) proc_info.set_cache_refs(data.cache_refs[read_selector]) proc_info.set_time_ns(data.time_ns[read_selector]) add_proc = False for multisocket_selector in range(read_selector, total_slots_length, self.SELECTOR_DIM): if data.ts[read_selector] + self.timeslice > tsmax: socket_info = SocketProcessItem() socket_info.set_weighted_cycles( data.weighted_cycles[multisocket_selector]) socket_info.set_ts(data.ts[read_selector]) proc_info.set_socket_data( int(multisocket_selector / self.SELECTOR_DIM), socket_info) add_proc = True if add_proc: pid_dict[-1 * (1 + int(key.value))] = proc_info if self.power_measure == True: proc_info.set_power( self._get_pid_power(proc_info, total_weighted_cycles, core_power)) else: proc_info.set_power(0) proc_info.compute_cpu_usage_millis(float(total_execution_time), multiprocessing.cpu_count()) return BpfSample(tsmax, total_execution_time, sched_switch_count, self.timeslice, total_power, pid_dict, self.topology.get_hyperthread_count()) def _get_pid_power(self, pid, total_cycles, core_power): pid_power = 0 for socket in self.topology.get_sockets(): if float(total_cycles[socket]) > 0: pid_power = pid_power + (core_power[socket] * \ (float(pid.get_socket_data(socket).get_weighted_cycles()) \ / float(total_cycles[socket]))) return pid_power
def __init__(self, args, debug=0, frequency=99): # code substitutions if args.cpus: self.bpf_text = self.bpf_text.replace( 'STORAGE', 'BPF_HISTOGRAM(dist, cpu_key_t);') self.bpf_text = self.bpf_text.replace( 'STORE', 'cpu_key_t key = {.slot = len}; ' + 'key.cpu = bpf_get_smp_processor_id(); ' + 'dist.increment(key);') else: self.bpf_text = self.bpf_text.replace( 'STORAGE', 'BPF_HISTOGRAM(dist, unsigned int);') self.bpf_text = self.bpf_text.replace('STORE', 'dist.increment(len);') if self.check_runnable_weight_field(): self.bpf_text = self.bpf_text.replace( 'RUNNABLE_WEIGHT_FIELD', 'unsigned long runnable_weight;') else: self.bpf_text = self.bpf_text.replace('RUNNABLE_WEIGHT_FIELD', '') if debug or args.ebpf: print(self.bpf_text) if args.ebpf: exit() countdown = int(args.count) samples = 0 # initialize BPF & perf_events b = BPF(text=self.bpf_text) b.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event", sample_period=0, sample_freq=frequency) if not args.json: print("Sampling run queue length... Hit Ctrl-C to end.") # output exiting = 0 if args.interval else 1 dist = b.get_table("dist") while (1): try: sleep(int(args.interval)) except KeyboardInterrupt: exiting = 1 print() if args.timestamp: if not args.json: print("%-8s\n" % strftime("%H:%M:%S"), end="") if args.runqocc: if args.cpus: # run queue occupancy, per-CPU summary idle = {} queued = {} cpumax = 0 for k, v in dist.items(): if k.cpu > cpumax: cpumax = k.cpu for c in range(0, cpumax + 1): idle[c] = 0 queued[c] = 0 for k, v in dist.items(): if k.slot == 0: idle[k.cpu] += v.value else: queued[k.cpu] += v.value for c in range(0, cpumax + 1): samples = idle[c] + queued[c] if samples: runqocc = float(queued[c]) / samples else: runqocc = 0 if not args.json: print("runqocc, CPU %-3d %6.2f%%" % (c, 100 * runqocc)) else: print( '{"tag": runqocc, "timestamp": %-8s, "cpu": %-3d %6.2f%%}' % (strftime("%H:%M:%S"), c, 100 * runqocc)) else: # run queue occupancy, system-wide summary idle = 0 queued = 0 for k, v in dist.items(): if k.value == 0: idle += v.value else: queued += v.value samples = idle + queued if samples: runqocc = float(queued) / samples else: runqocc = 0 if not args.json: print("runqocc: %0.2f%%" % (100 * runqocc)) else: print( '{"tag": runqocc, "timestamp": %-8s, "val": %0.2f%%}' % (strftime("%H:%M:%S"), 100 * runqocc)) else: # run queue length histograms if not args.json: dist.print_linear_hist("runqlen", "cpu") dist.clear() countdown -= 1 if exiting or countdown == 0: exit()
if (written <= 0) { bpf_trace_printk("error: %d\n", written); return 0; } stack.push(&entry, 0); return 0; } """ b = BPF(text=text) # Not working: problem -> need to use xattr to configure stack trace sampling # (probably should just implement this in pybpf I guess) b.attach_perf_event(ev_type=PerfType.HARDWARE, ev_config=PerfHWConfig.CPU_CYCLES, fn_name='do_perf_event', sample_freq=4000) try: time.sleep(999999) except KeyboardInterrupt: pass while 1: try: print(b["stack"].pop().a) except KeyError: break
ptr = array.lookup(&zero); if (ptr && ptr->ts && ts > ptr->ts + THRESHOLD) { int stackid = stack_traces.get_stackid(ctx, 0); //bpf_trace_printk("profile\n"); counts.increment(stackid); array.update(&zero, &data); } return 0; } """ b = BPF(text=bpf_text) b.attach_perf_event(ev_type=PerfType.HARDWARE, ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_profile", sample_period=0, sample_freq=10000, cpu=-1) try: # check /sys/kernel/debug/tracing/tracing_on b.trace_print() except KeyboardInterrupt: print() counts = b["counts"] stack_traces = b["stack_traces"] for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): stack = stack_traces.walk(k.value) for frame in stack: print(b.ksym(frame).decode()) print()
struct output_t out = {}; out.cpu = cpu; out.miss = new_miss - *old_miss; out.hit = new_hit - *old_hit; *old_miss = new_miss; *old_hit = new_hit; events.perf_submit(ctx, &out, sizeof(out)); return 0; } """ def handle_output(cpu, data, size): event = b['events'].event(data) print(event.cpu, event.miss, event.hit) b = BPF(text=prog, cflags=['-DNUM_CPUS=%d' % multiprocessing.cpu_count()]) b['cache_misses'].open_perf_event(PerfType.HARDWARE, PerfHWConfig.CACHE_MISSES) b['events'].open_perf_buffer(handle_output) b.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK, fn_name='do_perf', sample_freq=1) while True: b.perf_buffer_poll()
struct trace_t trace = { .stack_id = traces.get_stackid(&ctx->regs, BPF_F_USER_STACK) }; cache.increment(trace); return 0; } """ program_pid = int(sys.argv[1]) bpf_source = bpf_source.replace('PROGRAM_PID', str(program_pid)) bpf = BPF(text=bpf_source) bpf.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK, fn_name='collect_stack_traces', sample_period=1) exiting = 0 try: sleep(300) except KeyboardInterrupt: exiting = 1 signal.signal(signal.SIGINT, signal_ignore) print("dumping the results") for trace, acc in sorted(bpf['cache'].items(), key=lambda cache: cache[1].value): line = [] if trace.stack_id < 0 and trace.stack_id == -errno.EFAULT: line = ['Unknown stack'] else:
result.tid = bpf_get_current_pid_tgid(); events.perf_submit(ctx, &result, sizeof(result)); return 0; } """ if args.ebpf: print(bpf_text) exit() b = BPF(text=bpf_text) try: b.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.PAGE_FAULTS, fn_name="on_cache_miss", sample_period=args.sample_period, cpu=3) # b.attach_perf_event( # ev_type=PerfType.HARDWARE, ev_config=PerfHWConfig.CACHE_REFERENCES, # fn_name="on_cache_ref", sample_period=args.sample_period) except Exception as e: print("Failed to attach to a hardware event. Is this a virtual machine?") print(e) exit() print("Running for {} seconds or hit Ctrl-C to end.".format(args.duration)) print("%-4s %-8s %-14s %-4s" % ("CPU", "TID", "ADDRESS", "H/M"))
"-include", "/usr/src/zfs-" + KVER + "/zfs_config.h", "-I/usr/src/zfs-" + KVER + "/include/", "-I/usr/src/zfs-" + KVER + "/include/spl", "-I/usr/src/zfs-" + KVER + "/include/", "-I/usr/src/zfs-" + KVER + "/include/linux" ]) b.attach_kprobe(event="spa_sync", fn_name="spa_sync_entry") b.attach_kretprobe(event="spa_sync", fn_name="spa_sync_return") b.attach_kprobe(event="dsl_pool_sync", fn_name="dsl_pool_sync_entry") b.attach_kretprobe(event="dsl_pool_sync", fn_name="dsl_pool_sync_return") b.attach_kprobe(event="dmu_tx_delay", fn_name="dmu_tx_delay_entry") b.attach_kprobe(event="trace_zfs_delay__mintime", fn_name="dmu_tx_delay_mintime") b.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK, fn_name="get_spa_dirty", sample_freq=10) print_count = 30 # initialize dgrn program object to read zfs_dirty_data_max prog = drgn.program_from_kernel() # loop with callback to print_event b["sync_events"].open_perf_buffer(print_event) while 1: try: b.perf_buffer_poll() except KeyboardInterrupt: exit()
}; if (trace.stack_id >= 0) { cache.increment(trace); } return 0; } """ program_pid = sys.argv[1] bpf_source = bpf_source.replace("PROGRAM_PID", program_pid) bpf = BPF(text=bpf_source) bpf.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK, fn_name="collect_stack_traces", cpu=0, sample_freq=99) try: sleep(999999999) except KeyboardInterrupt: signal.signal(signal.SIGINT, signal.SIG_DFL) cache = bpf["cache"] traces = bpf["traces"] bpf.detach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK) bpf.add_module("/usr/lib64/libc-2.30.so")
print("Sampling at %s of %s by %s stack" % (sample_context, thread_context, stack_context), end="") if duration < 99999999: print(" for %d secs." % duration) else: print("... Hit Ctrl-C to end.") if debug or args.ebpf: print(bpf_text) if args.ebpf: exit() # initialize BPF & perf_events b = BPF(text=bpf_text) b.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event", sample_period=sample_period, sample_freq=sample_freq) # signal handler def signal_ignore(signal, frame): print() # # Output Report # # collect samples try: sleep(duration) except KeyboardInterrupt: # as cleanup can take some time, trap Ctrl-C:
if args.pid: try: open('/proc/%s/comm' % args.pid).read().strip() except IOError: print("Invalid PID %s" % args.pid) exit() bpf_text = bpf_text.replace('--PID--', args.pid) else: print("Provide pid") exit() b = BPF(text=bpf_text) b.attach_perf_event(ev_type=PerfType.HARDWARE, ev_config=PerfHWConfig.CACHE_MISSES, fn_name="on_cache_miss", sample_period=perf_sample_period) b.attach_perf_event(ev_type=PerfType.HARDWARE, ev_config=PerfHWConfig.CACHE_REFERENCES, fn_name="on_cache_ref", sample_period=perf_sample_period) exiting = 0 while (1): try: sleep(0.1) except KeyboardInterrupt: exiting = 1
from __future__ import print_function from bcc import BPF import ctypes as ct import signal from time import sleep def signal_ignore(signum, frame): print() # load BPF program b = BPF(src_file="source.c") b.attach_perf_event(ev_type=1, ev_config=6, fn_name="page_maj_flt", sample_period=0, sample_freq=49) b.attach_perf_event(ev_type=1, ev_config=5, fn_name="page_min_flt", sample_period=0, sample_freq=49) print("Running for {} seconds or hit Ctrl-C to end.".format(100)) try: sleep(float(100)) except KeyboardInterrupt: signal.signal(signal.SIGINT, signal_ignore) print(" ")