def get_data(): # Will be empty when no language was specified for tracing if args.latency: data = list(map(lambda kv: (kv[0].clazz.decode('utf-8', 'replace') \ + "." + \ kv[0].method.decode('utf-8', 'replace'), (kv[1].num_calls, kv[1].total_ns)), bpf["times"].items())) else: data = list(map(lambda kv: (kv[0].clazz.decode('utf-8', 'replace') \ + "." + \ kv[0].method.decode('utf-8', 'replace'), (kv[1].value, 0)), bpf["counts"].items())) if args.syscalls: if args.latency: syscalls = map(lambda kv: (syscall_name(kv[0].value).decode('utf-8', 'replace'), (kv[1].num_calls, kv[1].total_ns)), bpf["systimes"].items()) data.extend(syscalls) else: syscalls = map(lambda kv: (syscall_name(kv[0].value).decode('utf-8', 'replace'), (kv[1].value, 0)), bpf["syscounts"].items()) data.extend(syscalls) return sorted(data, key=lambda kv: kv[1][1 if args.latency else 0])
def print_syscall_fname_event(cpu, data, size): event = b["syscall_fname_events"].event(data) print("[{}] [{}] {}: {}".format( event.timestamp, event.pid, syscall_name(event.id), event.fname )) out_data.append({ "timestamp": event.timestamp, "pid": event.pid, "syscall": syscall_name(event.id), "arguments": event.fname, })
def print_syscall_clone_event(cpu, data, size): event = b["syscall_clone_events"].event(data) print("[{}] [{}] {}: child={}".format( event.timestamp, event.pid, syscall_name(event.id), event.return_value )) out_data.append({ "timestamp": event.timestamp, "pid": event.pid, "syscall": syscall_name(event.id), "arguments": "child={}".format(event.return_value) })
def print_syscall_ptrace_event(cpu, data, size): event = b["syscall_ptrace_events"].event(data) print("[{}] [{}] {}: request={} target_pid={}".format( event.timestamp, event.pid, syscall_name(event.id), event.request, event.target_pid )) out_data.append({ "timestamp": event.timestamp, "pid": event.pid, "syscall": syscall_name(event.id), "arguments": "request={} target_pid={}".format( event.request, event.target_pid) })
def print_event_perf(cpu, data, size): event = b["events"].event(data) string = "pid_tgid:" + str(event.pid_tgid) + ", pid_tgid32:" + str( event.pid32) + ",pid name: " + comm_for_pid( event.pid32) + ", syscall: " + syscall_name(event.sys) + '\n' f = open("outputbpfdata.txt", "w+") f.write(string)
def producer(self): for bpf_collections in self.attached_bpf: for lang, bpf in bpf_collections.items(): data = list(map(lambda kv: (kv[0].clazz.decode('utf-8', 'replace') \ + "." + \ kv[0].method.decode('utf-8', 'replace'), (kv[1].num_calls, kv[1].total_ns)), bpf["times"].items())) syscalls = map( lambda kv: (syscall_name(kv[0].value).decode('utf-8', 'replace'), (kv[1].num_calls, kv[1].total_ns)), bpf["systimes"].items()) data.extend(syscalls) result = {'lang': lang, "event": []} for k, v in data: term = { lang: { 'function': k, 'call_count': v[0], 'call_time_avg': (v[1] / 1000000.0) / v[0], 'call_time_total': (v[1] / 1000000.0), } } result["event"].append(term) if len(result["event"]) > 0: self.queue.put(result) bpf['systimes'].clear() bpf['times'].clear()
def print_event_hash(): data = bpf["data"] #string = "pid_tgid:"+ str(data.pid_tgid)+ ", pid_tgid32:" +str(data.pid32)+ ",pid name: " +comm_for_pid(data.pid32)+ ", syscall: "+ syscall_name(data.sys) + '\n' for k, v in sorted(data.items(), key=lambda kv: -kv[0].value, reverse=True): if k.value == 0xFFFFFFFF: continue # happens occasionally, we don't need it printb( (b"%-20d %22s %8d %20s") % (k.value, comm_for_pid(v.pid32), v.pid_tgid, syscall_name(v.sys))) print("STARTING EXIT PRINT") data = bpf["data_exit_hash"] for k, v in sorted(data.items(), key=lambda kv: -kv[0].value, reverse=True): if k.value == 0xFFFFFFFF: continue # happens occasionally, we don't need it printb((b"%-20d %22s %8d %20s %15d") % (k.value, comm_for_pid( v.pid32), v.pid_tgid, syscall_name(v.sys), v.ret))
def print_event_perf(cpu, data, size): global start sys_event = bpf["events"].event(data) if (sys_event.identifier == 1): #printing exit system calls #l = (1, sys_event.time, comm_for_pid(sys_event.ex_pid32), sys_event.ex_pid32, syscall_name(sys_event.ex_sys).decode('utf-8'),sys_event.ex_sys, sys_event.ex_ret) l = (1, sys_event.time, comm_for_pid(sys_event.ex_pid32), sys_event.ex_pid32, syscall_name(sys_event.ex_sys).decode('utf-8'), sys_event.ex_sys, sys_event.testret) syscall.append(l) print("testret: %-20d" % (sys_event.testret)) #print(syscall) #print("exit: %-20d %22s %12d %20s %15d %15d" % (sys_event.time, comm_for_pid(sys_event.ex_pid32).decode('utf-8'), sys_event.ex_pid32, syscall_name(sys_event.ex_sys).decode('utf-8'),sys_event.ex_sys, sys_event.ex_ret)) elif (sys_event.identifier == 0): #printing enter system calls l = (0, sys_event.time, comm_for_pid(sys_event.ent_pid32), sys_event.ent_pid32, syscall_name(sys_event.ent_sys).decode('utf-8'), sys_event.ent_sys) syscall.append(l) #print("enter: %-20d %22s %12d %20s %15d" % (sys_event.time, comm_for_pid(sys_event.ent_pid32).decode('utf-8'), sys_event.ent_pid32, syscall_name(sys_event.ent_sys).decode('utf-8'), sys_event.ent_sys)) elif (sys_event.identifier == 2): #printing method enter calls l = (2, sys_event.time, sys_event.ent_pid32, sys_event.ip, bpf.sym(sys_event.ip, sys_event.ent_pid32).decode('utf-8')) method.append(l) print("method ent: %-20d %12d %8d %15s" % (sys_event.time, sys_event.ent_pid32, sys_event.ip, bpf.sym(sys_event.ip, sys_event.ent_pid32).decode('utf-8'))) elif (sys_event.identifier == 3): #printing method exit calls l = (3, sys_event.time, sys_event.ex_pid32, sys_event.ip, bpf.sym(sys_event.ip, sys_event.ex_pid32).decode('utf-8'), sys_event.ex_ret) method.append(l) print("method ex: %-20d %12d %8d %15s %12d" % (sys_event.time, sys_event.ex_pid32, sys_event.ip, bpf.sym(sys_event.ip, sys_event.ex_pid32).decode('utf-8'), sys_event.ex_ret)) elif (sys_event.identifier == 4): #process has spawned x_children.append(sys_event.ent_pid32)
def json_dump(b: BPF): data = dict() syscalls_map = { str(syscall_name(k.value).decode()): v.value for k, v in b["data"].items() } # data["pid"] = args.pid # data["command"] = get_command(args.pid) data["syscalls"] = syscalls_map return data
def print_count_stats(): data = bpf["data"] print("[%s]" % strftime("%H:%M:%S")) print("%-22s %8s" % ("SYSCALL", "COUNT")) for k, v in sorted(data.items(), key=lambda kv: -kv[1].value)[:args.top]: if k.value == 0xFFFFFFFF: continue # happens occasionally, we don't need it printb(b"%-22s %8d" % (syscall_name(k.value % 10000), v.value)) print("") data.clear()
def on_syscall(cpu, data, size): event = self.bpf['on_syscall'].event(data) addrs = self.bpf['user_stack'].walk(event.trace_id) addrs = list(addrs) name = syscall_name(event.syscall).decode('utf-8') # print(f'Syscall {name:<16} blamed on 0x{addrs[-1]:016x}') print(f'Syscall {name:<16}') for addr in addrs: print( f' 0x{addr:016x} -> {self.bpf.sym(addr, self.pid, show_offset=True, demangle=False)}' ) print()
def agg_colval(key): if args.process: return b"%-6d %-15s" % (key.value, comm_for_pid(key.value)) else: return syscall_name(key.value)
def syscall_name(num): """ Return uppercase system call name. """ return syscall.syscall_name(num).decode('utf-8')
u32 key = args->id; val = syscall_count.lookup_or_init(&key, &zero); ++(*val); return 0; } """ if args.pid: bpf_text = insert_pid_filter(bpf_text, args.pid) else: bpf_text = insert_name_filter(bpf_text, args.name) if args.dump_prog: print(bpf_text) exit() print("Aggregating syscalls. CTRL-C to quit.") b = BPF(text=bpf_text) counter = b["syscall_count"] while True: try: sleep(1) except KeyboardInterrupt: print("") for k, v in sorted(counter.items(), key=lambda kv: -kv[1].value): print(b"%-22s %8d" % (syscall_name(k.value), v.value)) exit()
def print_result(b: BPF): for k, v in b["data"].items(): if k.value == 0xFFFFFFFF: continue print("{}: {}".format(syscall_name(k.value).decode(), v.value))
def print_event_hash(): data = bpf["data"] method_data = bpf["method_ent"] children = bpf["children"] for k, v in sorted(data.items(), key=lambda kv: -kv[0].value, reverse = True): if k.value == 0xFFFFFFFF: continue # happens occasionally, we don't need it if(v.ent_pid32 == 0): printb((b"exit: %-20d %22s %12d %8d %20s %15d %15d") % (k.value,comm_for_pid(v.ex_pid32), v.ex_pid32, v.ex_pid_tgid, syscall_name(v.ex_sys), v.ex_sys, v.ex_ret)) else: printb((b"enter: %-20d %22s %12d %8d %20s %15d") % (k.value, comm_for_pid(v.ent_pid32), v.ent_pid32, v.ent_pid_tgid, syscall_name(v.ent_sys), v.ex_sys)) for k, v in sorted(method_data.items(), key=lambda kv: -kv[0].value, reverse = True): printb((b"method: %-20d %12d %12d %12d %20s") % (k.value, v.pid_tgid, v.pid32, v.ip, bpf.sym(v.ip, v.pid32))) for k, v in sorted(children.items(), key=lambda kv: -kv[0].value, reverse = True): printb((b"child: %-20d") % (k.value))
def run(display, b, bpf_dict, pid_list, comm_list): """ Main loop. Sleep interval, then read the data from bpf map (b['map']) and add it to the collection. Args: b(BPF object). This is the main object for defining a BPF program, and interacting with its output. It is associated to syscalls. bpf_dict(BPF object dictionary). A bpf dictionnay for usdt. pid_list (:obj:`list` of :obj:`str`) : list of pids you want to trace. comm_list (:obj:`list` of :obj:`str`) : list of process name you want to trace. """ # clear to start collecting everything at the same time b['map'].clear() global u_bpf_dict while display.die is False: try: sleep(display.refresh_intvl) # reset the rate for each doc in the collection display.collection.reset_info() now = monotonic_time() for k, v in b['map'].items(): # map.clear() or item.__delitem__() are not thread safe !! # Unfortunatly we need to delete items in the map, it saves # entries in map. # delete items that are not active for more than 1 sec # by assuming old entries won't create consistency issues zeroed = False if v.startTime < int(now - INACT_THRSLD): b['map'].__delitem__(k) zeroed = True if ((k.pid != 0) and (str(k.pid) in pid_list or '-1' in pid_list) and (k.comm.decode() in comm_list or 'all' in comm_list)): if not k.fname: # in case of a syscall fname is empty k.fname = syscall_name(k.sysid) # get fname sc = ctStats(b'[%s]' % k.fname, v.counter, v.cumLat) # lookup the doc in the collection. If it does not exists # then create it. doc = display.collection.lookup_or_create(k.pid, k.comm) # update the stats for this doc doc.update_doc_stats(sc) if zeroed is True: doc.keep_previous_count(sc) for usdt_bpf in u_bpf_dict.values(): ubpf = usdt_bpf[1] for k, v in ubpf['map'].items(): zeroed = False if v.startTime < int(now - INACT_THRSLD): ubpf['map'].__delitem__(k) zeroed = True if ((k.pid != 0) and (str(k.pid) in pid_list or '-1' in pid_list) and (k.comm.decode() in comm_list or 'all' in comm_list)): sc = ctStats(k.fname, v.counter, v.cumLat) # lookup the doc in the collection. If it does not exists # then create it. doc = display.collection.lookup_or_create( k.pid, k.comm) # update the stats for this doc doc.update_doc_stats(sc) if zeroed is True: doc.keep_previous_count(sc) display.print_body() except KeyboardInterrupt: break display.print_header(b'Exiting ...') display.reset() display.die = True # will terminate the thread for keyboard
def print_latency_stats(): global c_number_total global c_latency_total global g_failure_rate global g_avg_latency host_name = socket.gethostname() application_name = comm_for_pid(args.pid) data = bpf["data"] print("[%s]" % strftime("%H:%M:%S")) print("%-22s %8s %16s %12s %12s" % ("SYSCALL", "COUNT", time_colname, "ERRORNO", "PERCENTAGE")) data_summary = dict() for k, v in sorted(data.items(), key=lambda kv: -kv[1].total_ns)[:args.top]: if k.value == 0xFFFFFFFF or k.value == 9999: continue # happens occasionally, we don't need it if v.error_no < 0: try: return_info = errno.errorcode[abs(v.error_no)] except KeyError: return_info = v.error_no else: # all the system calls whose return value is >= 0 # are considered to be successful return_info = "SUCCESS" key = syscall_name(k.value % 10000) if b"unknown" in key: continue if key in data_summary: data_summary[key][ "total_count"] = data_summary[key]["total_count"] + v.count data_summary[key]["details"].append({ "return_code": return_info, "count": v.count, "latency": v.total_ns / (1e6 if args.milliseconds else 1e3) }) else: data_summary[key] = { "total_count": v.count, "details": [{ "return_code": return_info, "count": v.count, "latency": v.total_ns / (1e6 if args.milliseconds else 1e3) }] } g_failure_rate._metrics.clear( ) # otherwise, if the failure has gone, the metric (failure rate) will stay the same for syscall, info in data_summary.items(): for detail in info["details"]: detail["percentage"] = float(detail["count"]) / info["total_count"] printb((("%-22s %8d " + ("%16.6f" if args.milliseconds else "%16.3f") + " %12s %12.5f") % (syscall.decode(encoding='utf-8', errors='strict'), detail["count"], detail["latency"] / detail["count"], detail["return_code"], detail["percentage"])).encode( encoding='utf-8', errors='strict')) # export metrics c_number_total.labels(hostname=host_name, application_name=application_name, pid=args.pid, layer='os', syscall_name=syscall, error_code=detail["return_code"], injected_on_purpose=False).inc( detail["count"]) c_latency_total.labels(hostname=host_name, application_name=application_name, pid=args.pid, layer='os', syscall_name=syscall, error_code=detail["return_code"], injected_on_purpose=False).inc( detail["latency"]) g_avg_latency.labels(hostname=host_name, application_name=application_name, pid=args.pid, layer='os', syscall_name=syscall, error_code=detail["return_code"], injected_on_purpose=False).set( detail["latency"] / detail["count"]) if detail["return_code"] != "SUCCESS": g_failure_rate.labels(hostname=host_name, application_name=application_name, pid=args.pid, layer='os', syscall_name=syscall, error_code=detail["return_code"], injected_on_purpose=False).set( detail["percentage"]) print("") data.clear()
def agg_colval(key): return syscall_name(key.value)