class Probe(object): next_probe_index = 0 streq_index = 0 aliases = {"$PID": "(bpf_get_current_pid_tgid() >> 32)"} def _substitute_aliases(self, expr): if expr is None: return expr for alias, subst in Probe.aliases.items(): expr = expr.replace(alias, subst) return expr def _parse_signature(self): params = map(str.strip, self.signature.split(',')) self.param_types = {} for param in params: # If the type is a pointer, the * can be next to the # param name. Other complex types like arrays are not # supported right now. index = param.rfind('*') index = index if index != -1 else param.rfind(' ') param_type = param[0:index + 1].strip() param_name = param[index + 1:].strip() self.param_types[param_name] = param_type def _generate_entry(self): self.entry_probe_func = self.probe_func_name + "_entry" text = """ int PROBENAME(struct pt_regs *ctx SIGNATURE) { u64 __pid_tgid = bpf_get_current_pid_tgid(); u32 __pid = __pid_tgid; // lower 32 bits u32 __tgid = __pid_tgid >> 32; // upper 32 bits PID_FILTER COLLECT return 0; } """ text = text.replace("PROBENAME", self.entry_probe_func) text = text.replace("SIGNATURE", "" if len(self.signature) == 0 else ", " + self.signature) text = text.replace("PID_FILTER", self._generate_pid_filter()) collect = "" for pname in self.args_to_probe: param_hash = self.hashname_prefix + pname if pname == "__latency": collect += """ u64 __time = bpf_ktime_get_ns(); %s.update(&__pid, &__time); """ % param_hash else: collect += "%s.update(&__pid, &%s);\n" % \ (param_hash, pname) text = text.replace("COLLECT", collect) return text def _generate_entry_probe(self): # Any $entry(name) expressions result in saving that argument # when entering the function. self.args_to_probe = set() regex = r"\$entry\((\w+)\)" for expr in self.exprs: for arg in re.finditer(regex, expr): self.args_to_probe.add(arg.group(1)) for arg in re.finditer(regex, self.filter): self.args_to_probe.add(arg.group(1)) if any(map(lambda expr: "$latency" in expr, self.exprs)) or \ "$latency" in self.filter: self.args_to_probe.add("__latency") self.param_types["__latency"] = "u64" # nanoseconds for pname in self.args_to_probe: if pname not in self.param_types: raise ValueError("$entry(%s): no such param" % arg) self.hashname_prefix = "%s_param_" % self.probe_hash_name text = "" for pname in self.args_to_probe: # Each argument is stored in a separate hash that is # keyed by pid. text += "BPF_HASH(%s, u32, %s);\n" % \ (self.hashname_prefix + pname, self.param_types[pname]) text += self._generate_entry() return text def _generate_retprobe_prefix(self): # After we're done here, there are __%s_val variables for each # argument we needed to probe using $entry(name), and they all # have values (which isn't necessarily the case if we missed # the method entry probe). text = "" self.param_val_names = {} for pname in self.args_to_probe: val_name = "__%s_val" % pname text += "%s *%s = %s.lookup(&__pid);\n" % \ (self.param_types[pname], val_name, self.hashname_prefix + pname) text += "if (%s == 0) { return 0 ; }\n" % val_name self.param_val_names[pname] = val_name return text def _replace_entry_exprs(self): for pname, vname in self.param_val_names.items(): if pname == "__latency": entry_expr = "$latency" val_expr = "(bpf_ktime_get_ns() - *%s)" % vname else: entry_expr = "$entry(%s)" % pname val_expr = "(*%s)" % vname for i in range(0, len(self.exprs)): self.exprs[i] = self.exprs[i].replace( entry_expr, val_expr) self.filter = self.filter.replace(entry_expr, val_expr) def _attach_entry_probe(self): if self.is_user: self.bpf.attach_uprobe(name=self.library, sym=self.function, fn_name=self.entry_probe_func, pid=self.pid or -1) else: self.bpf.attach_kprobe(event=self.function, fn_name=self.entry_probe_func) def _bail(self, error): raise ValueError("error parsing probe '%s': %s" % (self.raw_spec, error)) def _validate_specifier(self): # Everything after '#' is the probe label, ignore it spec = self.raw_spec.split('#')[0] parts = spec.strip().split(':') if len(parts) < 3: self._bail("at least the probe type, library, and " + "function signature must be specified") if len(parts) > 6: self._bail("extraneous ':'-separated parts detected") if parts[0] not in ["r", "p", "t", "u"]: self._bail("probe type must be 'p', 'r', 't', or 'u'" + " but got '%s'" % parts[0]) if re.match(r"\S+\(.*\)", parts[2]) is None: self._bail(("function signature '%s' has an invalid " + "format") % parts[2]) def _parse_expr_types(self, expr_types): if len(expr_types) == 0: self._bail("no expr types specified") self.expr_types = expr_types.split(',') def _parse_exprs(self, exprs): if len(exprs) == 0: self._bail("no exprs specified") self.exprs = exprs.split(',') def _make_valid_identifier(self, ident): return re.sub(r'[^A-Za-z0-9_]', '_', ident) def __init__(self, tool, type, specifier): self.usdt_ctx = None self.streq_functions = "" self.pid = tool.args.pid self.cumulative = tool.args.cumulative or False self.raw_spec = specifier self._validate_specifier() spec_and_label = specifier.split('#') self.label = spec_and_label[1] \ if len(spec_and_label) == 2 else None parts = spec_and_label[0].strip().split(':') self.type = type # hist or freq self.probe_type = parts[0] fparts = parts[2].split('(') self.function = fparts[0].strip() if self.probe_type == "t": self.library = "" # kernel self.tp_category = parts[1] self.tp_event = self.function elif self.probe_type == "u": self.library = parts[1] self.probe_func_name = self._make_valid_identifier( "%s_probe%d" % (self.function, Probe.next_probe_index)) self._enable_usdt_probe() else: self.library = parts[1] self.is_user = len(self.library) > 0 self.signature = fparts[1].strip()[:-1] self._parse_signature() # If the user didn't specify an expression to probe, we probe # the retval in a ret probe, or simply the value "1" otherwise. self.is_default_expr = len(parts) < 5 if not self.is_default_expr: self._parse_expr_types(parts[3]) self._parse_exprs(parts[4]) if len(self.exprs) != len(self.expr_types): self._bail("mismatched # of exprs and types") if self.type == "hist" and len(self.expr_types) > 1: self._bail("histograms can only have 1 expr") else: if not self.probe_type == "r" and self.type == "hist": self._bail("histograms must have expr") self.expr_types = \ ["u64" if not self.probe_type == "r" else "int"] self.exprs = \ ["1" if not self.probe_type == "r" else "$retval"] self.filter = "" if len(parts) != 6 else parts[5] self._substitute_exprs() # Do we need to attach an entry probe so that we can collect an # argument that is required for an exit (return) probe? def check(expr): keywords = ["$entry", "$latency"] return any(map(lambda kw: kw in expr, keywords)) self.entry_probe_required = self.probe_type == "r" and \ (any(map(check, self.exprs)) or check(self.filter)) self.probe_func_name = self._make_valid_identifier( "%s_probe%d" % (self.function, Probe.next_probe_index)) self.probe_hash_name = self._make_valid_identifier( "%s_hash%d" % (self.function, Probe.next_probe_index)) Probe.next_probe_index += 1 def _enable_usdt_probe(self): self.usdt_ctx = USDT(path=self.library, pid=self.pid) self.usdt_ctx.enable_probe( self.function, self.probe_func_name) def _generate_streq_function(self, string): fname = "streq_%d" % Probe.streq_index Probe.streq_index += 1 self.streq_functions += """ static inline bool %s(char const *ignored, char const *str) { char needle[] = %s; char haystack[sizeof(needle)]; bpf_probe_read(&haystack, sizeof(haystack), (void *)str); for (int i = 0; i < sizeof(needle) - 1; ++i) { if (needle[i] != haystack[i]) { return false; } } return true; } """ % (fname, string) return fname def _substitute_exprs(self): def repl(expr): expr = self._substitute_aliases(expr) matches = re.finditer('STRCMP\\(("[^"]+\\")', expr) for match in matches: string = match.group(1) fname = self._generate_streq_function(string) expr = expr.replace("STRCMP", fname, 1) return expr.replace("$retval", "PT_REGS_RC(ctx)") for i in range(0, len(self.exprs)): self.exprs[i] = repl(self.exprs[i]) self.filter = repl(self.filter) def _is_string(self, expr_type): return expr_type == "char*" or expr_type == "char *" def _generate_hash_field(self, i): if self._is_string(self.expr_types[i]): return "struct __string_t v%d;\n" % i else: return "%s v%d;\n" % (self.expr_types[i], i) def _generate_usdt_arg_assignment(self, i): expr = self.exprs[i] if self.probe_type == "u" and expr[0:3] == "arg": arg_index = int(expr[3]) arg_ctype = self.usdt_ctx.get_probe_arg_ctype( self.function, arg_index - 1) return (" %s %s = 0;\n" + " bpf_usdt_readarg(%s, ctx, &%s);\n") \ % (arg_ctype, expr, expr[3], expr) else: return "" def _generate_field_assignment(self, i): text = self._generate_usdt_arg_assignment(i) if self._is_string(self.expr_types[i]): return (text + " bpf_probe_read(&__key.v%d.s," + " sizeof(__key.v%d.s), (void *)%s);\n") % \ (i, i, self.exprs[i]) else: return text + " __key.v%d = %s;\n" % \ (i, self.exprs[i]) def _generate_hash_decl(self): if self.type == "hist": return "BPF_HISTOGRAM(%s, %s);" % \ (self.probe_hash_name, self.expr_types[0]) else: text = "struct %s_key_t {\n" % self.probe_hash_name for i in range(0, len(self.expr_types)): text += self._generate_hash_field(i) text += "};\n" text += "BPF_HASH(%s, struct %s_key_t, u64);\n" % \ (self.probe_hash_name, self.probe_hash_name) return text def _generate_key_assignment(self): if self.type == "hist": return self._generate_usdt_arg_assignment(0) + \ ("%s __key = %s;\n" % (self.expr_types[0], self.exprs[0])) else: text = "struct %s_key_t __key = {};\n" % \ self.probe_hash_name for i in range(0, len(self.exprs)): text += self._generate_field_assignment(i) return text def _generate_hash_update(self): if self.type == "hist": return "%s.increment(bpf_log2l(__key));" % \ self.probe_hash_name else: return "%s.increment(__key);" % self.probe_hash_name def _generate_pid_filter(self): # Kernel probes need to explicitly filter pid, because the # attach interface doesn't support pid filtering if self.pid is not None and not self.is_user: return "if (__tgid != %d) { return 0; }" % self.pid else: return "" def generate_text(self): program = "" probe_text = """ DATA_DECL """ + ( "TRACEPOINT_PROBE(%s, %s)" % (self.tp_category, self.tp_event) if self.probe_type == "t" else "int PROBENAME(struct pt_regs *ctx SIGNATURE)") + """ { u64 __pid_tgid = bpf_get_current_pid_tgid(); u32 __pid = __pid_tgid; // lower 32 bits u32 __tgid = __pid_tgid >> 32; // upper 32 bits PID_FILTER PREFIX if (!(FILTER)) return 0; KEY_EXPR COLLECT return 0; } """ prefix = "" signature = "" # If any entry arguments are probed in a ret probe, we need # to generate an entry probe to collect them if self.entry_probe_required: program += self._generate_entry_probe() prefix += self._generate_retprobe_prefix() # Replace $entry(paramname) with a reference to the # value we collected when entering the function: self._replace_entry_exprs() if self.probe_type == "p" and len(self.signature) > 0: # Only entry uprobes/kprobes can have user-specified # signatures. Other probes force it to (). signature = ", " + self.signature program += probe_text.replace("PROBENAME", self.probe_func_name) program = program.replace("SIGNATURE", signature) program = program.replace("PID_FILTER", self._generate_pid_filter()) decl = self._generate_hash_decl() key_expr = self._generate_key_assignment() collect = self._generate_hash_update() program = program.replace("DATA_DECL", decl) program = program.replace("KEY_EXPR", key_expr) program = program.replace("FILTER", "1" if len(self.filter) == 0 else self.filter) program = program.replace("COLLECT", collect) program = program.replace("PREFIX", prefix) return self.streq_functions + program def _attach_u(self): libpath = BPF.find_library(self.library) if libpath is None: libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: self._bail("unable to find library %s" % self.library) if self.probe_type == "r": self.bpf.attach_uretprobe(name=libpath, sym=self.function, fn_name=self.probe_func_name, pid=self.pid or -1) else: self.bpf.attach_uprobe(name=libpath, sym=self.function, fn_name=self.probe_func_name, pid=self.pid or -1) def _attach_k(self): if self.probe_type == "t": pass # Nothing to do for tracepoints elif self.probe_type == "r": self.bpf.attach_kretprobe(event=self.function, fn_name=self.probe_func_name) else: self.bpf.attach_kprobe(event=self.function, fn_name=self.probe_func_name) def attach(self, bpf): self.bpf = bpf if self.probe_type == "u": return if self.is_user: self._attach_u() else: self._attach_k() if self.entry_probe_required: self._attach_entry_probe() def _v2s(self, v): # Most fields can be converted with plain str(), but strings # are wrapped in a __string_t which has an .s field if "__string_t" in type(v).__name__: return str(v.s) return str(v) def _display_expr(self, i): # Replace ugly latency calculation with $latency expr = self.exprs[i].replace( "(bpf_ktime_get_ns() - *____latency_val)", "$latency") # Replace alias values back with the alias name for alias, subst in Probe.aliases.items(): expr = expr.replace(subst, alias) # Replace retval expression with $retval expr = expr.replace("PT_REGS_RC(ctx)", "$retval") # Replace ugly (*__param_val) expressions with param name return re.sub(r"\(\*__(\w+)_val\)", r"\1", expr) def _display_key(self, key): if self.is_default_expr: if not self.probe_type == "r": return "total calls" else: return "retval = %s" % str(key.v0) else: # The key object has v0, ..., vk fields containing # the values of the expressions from self.exprs def str_i(i): key_i = self._v2s(getattr(key, "v%d" % i)) return "%s = %s" % \ (self._display_expr(i), key_i) return ", ".join(map(str_i, range(0, len(self.exprs)))) def display(self, top): data = self.bpf.get_table(self.probe_hash_name) if self.type == "freq": print(self.label or self.raw_spec) print("\t%-10s %s" % ("COUNT", "EVENT")) sdata = sorted(data.items(), key=lambda p: p[1].value) if top is not None: sdata = sdata[-top:] for key, value in sdata: # Print some nice values if the user didn't # specify an expression to probe if self.is_default_expr: if not self.probe_type == "r": key_str = "total calls" else: key_str = "retval = %s" % \ self._v2s(key.v0) else: key_str = self._display_key(key) print("\t%-10s %s" % (str(value.value), key_str)) elif self.type == "hist": label = self.label or (self._display_expr(0) if not self.is_default_expr else "retval") data.print_log2_hist(val_type=label) if not self.cumulative: data.clear() def __str__(self): return self.label or self.raw_spec
class Probe(object): probe_count = 0 streq_index = 0 max_events = None event_count = 0 first_ts = 0 print_time = False use_localtime = True time_field = False print_cpu = False print_address = False tgid = -1 pid = -1 page_cnt = None build_id_enabled = False @classmethod def configure(cls, args): cls.max_events = args.max_events cls.print_time = args.timestamp or args.time cls.use_localtime = not args.timestamp cls.time_field = cls.print_time and (not cls.use_localtime) cls.print_cpu = args.print_cpu cls.print_address = args.address cls.first_ts = BPF.monotonic_time() cls.tgid = args.tgid or -1 cls.pid = args.pid or -1 cls.page_cnt = args.buffer_pages cls.bin_cmp = args.bin_cmp cls.build_id_enabled = args.sym_file_list is not None def __init__(self, probe, string_size, kernel_stack, user_stack): self.usdt = None self.streq_functions = "" self.raw_probe = probe self.string_size = string_size self.kernel_stack = kernel_stack self.user_stack = user_stack Probe.probe_count += 1 self._parse_probe() self.probe_num = Probe.probe_count self.probe_name = "probe_%s_%d" % \ (self._display_function(), self.probe_num) self.probe_name = re.sub(r'[^A-Za-z0-9_]', '_', self.probe_name) # compiler can generate proper codes for function # signatures with "syscall__" prefix if self.is_syscall_kprobe: self.probe_name = "syscall__" + self.probe_name[6:] def __str__(self): return "%s:%s:%s FLT=%s ACT=%s/%s" % (self.probe_type, self.library, self._display_function(), self.filter, self.types, self.values) def is_default_action(self): return self.python_format == "" def _bail(self, error): raise ValueError("error in probe '%s': %s" % (self.raw_probe, error)) def _parse_probe(self): text = self.raw_probe # There might be a function signature preceding the actual # filter/print part, or not. Find the probe specifier first -- # it ends with either a space or an open paren ( for the # function signature part. # opt. signature # probespec | rest # --------- ---------- -- (spec, sig, rest) = re.match(r'([^ \t\(]+)(\([^\(]*\))?(.*)', text).groups() self._parse_spec(spec) # Remove the parens self.signature = sig[1:-1] if sig else None if self.signature and self.probe_type in ['u', 't']: self._bail("USDT and tracepoint probes can't have " + "a function signature; use arg1, arg2, " + "... instead") text = rest.lstrip() # If we now have a (, wait for the balanced closing ) and that # will be the predicate self.filter = None if len(text) > 0 and text[0] == "(": balance = 1 for i in range(1, len(text)): if text[i] == "(": balance += 1 if text[i] == ")": balance -= 1 if balance == 0: self._parse_filter(text[:i + 1]) text = text[i + 1:] break if self.filter is None: self._bail("unmatched end of predicate") if self.filter is None: self.filter = "1" # The remainder of the text is the printf action self._parse_action(text.lstrip()) def _parse_spec(self, spec): parts = spec.split(":") # Two special cases: 'func' means 'p::func', 'lib:func' means # 'p:lib:func'. Other combinations need to provide an empty # value between delimiters, e.g. 'r::func' for a kretprobe on # the function func. if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] if len(parts[0]) == 0: self.probe_type = "p" elif parts[0] in ["p", "r", "t", "u"]: self.probe_type = parts[0] else: self._bail("probe type must be '', 'p', 't', 'r', " + "or 'u', but got '%s'" % parts[0]) if self.probe_type == "t": self.tp_category = parts[1] self.tp_event = parts[2] self.library = "" # kernel self.function = "" # from TRACEPOINT_PROBE elif self.probe_type == "u": self.library = ':'.join(parts[1:-1]) self.usdt_name = parts[-1] self.function = "" # no function, just address # We will discover the USDT provider by matching on # the USDT name in the specified library self._find_usdt_probe() else: self.library = ':'.join(parts[1:-1]) self.function = parts[-1] # only x64 syscalls needs checking, no other syscall wrapper yet. self.is_syscall_kprobe = False if self.probe_type == "p" and len(self.library) == 0 and \ self.function[:10] == "__x64_sys_": self.is_syscall_kprobe = True def _find_usdt_probe(self): target = Probe.pid if Probe.pid and Probe.pid != -1 \ else Probe.tgid self.usdt = USDT(path=self.library, pid=target) for probe in self.usdt.enumerate_probes(): if probe.name == self.usdt_name.encode('ascii'): return # Found it, will enable later self._bail("unrecognized USDT probe %s" % self.usdt_name) def _parse_filter(self, filt): self.filter = self._rewrite_expr(filt) def _parse_types(self, fmt): for match in re.finditer( r'[^%]%(s|u|d|lu|llu|ld|lld|hu|hd|x|lx|llx|c|K|U)', fmt): self.types.append(match.group(1)) fmt = re.sub(r'([^%]%)(u|d|lu|llu|ld|lld|hu|hd)', r'\1d', fmt) fmt = re.sub(r'([^%]%)(x|lx|llx)', r'\1x', fmt) fmt = re.sub('%K|%U', '%s', fmt) self.python_format = fmt.strip('"') def _parse_action(self, action): self.values = [] self.types = [] self.python_format = "" if len(action) == 0: return action = action.strip() match = re.search(r'(\".*?\"),?(.*)', action) if match is None: self._bail("expected format string in \"s") self.raw_format = match.group(1) self._parse_types(self.raw_format) for part in re.split('(?<!"),', match.group(2)): part = self._rewrite_expr(part) if len(part) > 0: self.values.append(part) aliases_arg = { "arg1": "PT_REGS_PARM1(ctx)", "arg2": "PT_REGS_PARM2(ctx)", "arg3": "PT_REGS_PARM3(ctx)", "arg4": "PT_REGS_PARM4(ctx)", "arg5": "PT_REGS_PARM5(ctx)", "arg6": "PT_REGS_PARM6(ctx)", } aliases_indarg = { "arg1": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM1(_ctx))); _val;})", "arg2": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM2(_ctx))); _val;})", "arg3": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM3(_ctx))); _val;})", "arg4": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM4(_ctx))); _val;})", "arg5": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM5(_ctx))); _val;})", "arg6": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM6(_ctx))); _val;})", } aliases_common = { "retval": "PT_REGS_RC(ctx)", "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)", "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)", "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)", "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)", "$cpu": "bpf_get_smp_processor_id()", "$task" : "((struct task_struct *)bpf_get_current_task())" } def _generate_streq_function(self, string): fname = "streq_%d" % Probe.streq_index Probe.streq_index += 1 self.streq_functions += """ static inline bool %s(char const *ignored, uintptr_t str) { char needle[] = %s; char haystack[sizeof(needle)]; bpf_probe_read(&haystack, sizeof(haystack), (void *)str); for (int i = 0; i < sizeof(needle) - 1; ++i) { if (needle[i] != haystack[i]) { return false; } } return true; } """ % (fname, string) return fname def _rewrite_expr(self, expr): if self.is_syscall_kprobe: for alias, replacement in Probe.aliases_indarg.items(): expr = expr.replace(alias, replacement) else: for alias, replacement in Probe.aliases_arg.items(): # For USDT probes, we replace argN values with the # actual arguments for that probe obtained using # bpf_readarg_N macros emitted at BPF construction. if self.probe_type == "u": continue expr = expr.replace(alias, replacement) for alias, replacement in Probe.aliases_common.items(): expr = expr.replace(alias, replacement) if self.bin_cmp: STRCMP_RE = 'STRCMP\\(\"([^"]+)\\"' else: STRCMP_RE = 'STRCMP\\(("[^"]+\\")' matches = re.finditer(STRCMP_RE, expr) for match in matches: string = match.group(1) fname = self._generate_streq_function(string) expr = expr.replace("STRCMP", fname, 1) return expr p_type = {"u": ct.c_uint, "d": ct.c_int, "lu": ct.c_ulong, "ld": ct.c_long, "llu": ct.c_ulonglong, "lld": ct.c_longlong, "hu": ct.c_ushort, "hd": ct.c_short, "x": ct.c_uint, "lx": ct.c_ulong, "llx": ct.c_ulonglong, "c": ct.c_ubyte, "K": ct.c_ulonglong, "U": ct.c_ulonglong} def _generate_python_field_decl(self, idx, fields): field_type = self.types[idx] if field_type == "s": ptype = ct.c_char * self.string_size else: ptype = Probe.p_type[field_type] fields.append(("v%d" % idx, ptype)) def _generate_python_data_decl(self): self.python_struct_name = "%s_%d_Data" % \ (self._display_function(), self.probe_num) fields = [] if self.time_field: fields.append(("timestamp_ns", ct.c_ulonglong)) if self.print_cpu: fields.append(("cpu", ct.c_int)) fields.extend([ ("tgid", ct.c_uint), ("pid", ct.c_uint), ("comm", ct.c_char * 16) # TASK_COMM_LEN ]) for i in range(0, len(self.types)): self._generate_python_field_decl(i, fields) if self.kernel_stack: fields.append(("kernel_stack_id", ct.c_int)) if self.user_stack: fields.append(("user_stack_id", ct.c_int)) return type(self.python_struct_name, (ct.Structure,), dict(_fields_=fields)) c_type = {"u": "unsigned int", "d": "int", "lu": "unsigned long", "ld": "long", "llu": "unsigned long long", "lld": "long long", "hu": "unsigned short", "hd": "short", "x": "unsigned int", "lx": "unsigned long", "llx": "unsigned long long", "c": "char", "K": "unsigned long long", "U": "unsigned long long"} fmt_types = c_type.keys() def _generate_field_decl(self, idx): field_type = self.types[idx] if field_type == "s": return "char v%d[%d];\n" % (idx, self.string_size) if field_type in Probe.fmt_types: return "%s v%d;\n" % (Probe.c_type[field_type], idx) self._bail("unrecognized format specifier %s" % field_type) def _generate_data_decl(self): # The BPF program will populate values into the struct # according to the format string, and the Python program will # construct the final display string. self.events_name = "%s_events" % self.probe_name self.struct_name = "%s_data_t" % self.probe_name self.stacks_name = "%s_stacks" % self.probe_name stack_type = "BPF_STACK_TRACE" if self.build_id_enabled is False \ else "BPF_STACK_TRACE_BUILDID" stack_table = "%s(%s, 1024);" % (stack_type,self.stacks_name) \ if (self.kernel_stack or self.user_stack) else "" data_fields = "" for i, field_type in enumerate(self.types): data_fields += " " + \ self._generate_field_decl(i) time_str = "u64 timestamp_ns;" if self.time_field else "" cpu_str = "int cpu;" if self.print_cpu else "" kernel_stack_str = " int kernel_stack_id;" \ if self.kernel_stack else "" user_stack_str = " int user_stack_id;" \ if self.user_stack else "" text = """ struct %s { %s %s u32 tgid; u32 pid; char comm[TASK_COMM_LEN]; %s %s %s }; BPF_PERF_OUTPUT(%s); %s """ return text % (self.struct_name, time_str, cpu_str, data_fields, kernel_stack_str, user_stack_str, self.events_name, stack_table) def _generate_field_assign(self, idx): field_type = self.types[idx] expr = self.values[idx].strip() text = "" if self.probe_type == "u" and expr[0:3] == "arg": arg_index = int(expr[3]) arg_ctype = self.usdt.get_probe_arg_ctype( self.usdt_name, arg_index - 1) text = (" %s %s = 0;\n" + " bpf_usdt_readarg(%s, ctx, &%s);\n") \ % (arg_ctype, expr, expr[3], expr) if field_type == "s": return text + """ if (%s != 0) { void *__tmp = (void *)%s; bpf_probe_read(&__data.v%d, sizeof(__data.v%d), __tmp); } """ % (expr, expr, idx, idx) if field_type in Probe.fmt_types: return text + " __data.v%d = (%s)%s;\n" % \ (idx, Probe.c_type[field_type], expr) self._bail("unrecognized field type %s" % field_type) def _generate_usdt_filter_read(self): text = "" if self.probe_type != "u": return text for arg, _ in Probe.aliases_arg.items(): if not (arg in self.filter): continue arg_index = int(arg.replace("arg", "")) arg_ctype = self.usdt.get_probe_arg_ctype( self.usdt_name, arg_index - 1) if not arg_ctype: self._bail("Unable to determine type of {} " "in the filter".format(arg)) text += """ {} {}_filter; bpf_usdt_readarg({}, ctx, &{}_filter); """.format(arg_ctype, arg, arg_index, arg) self.filter = self.filter.replace( arg, "{}_filter".format(arg)) return text def generate_program(self, include_self): data_decl = self._generate_data_decl() if Probe.pid != -1: pid_filter = """ if (__pid != %d) { return 0; } """ % Probe.pid # uprobes can have a built-in tgid filter passed to # attach_uprobe, hence the check here -- for kprobes, we # need to do the tgid test by hand: elif len(self.library) == 0 and Probe.tgid != -1: pid_filter = """ if (__tgid != %d) { return 0; } """ % Probe.tgid elif not include_self: pid_filter = """ if (__tgid == %d) { return 0; } """ % os.getpid() else: pid_filter = "" prefix = "" signature = "struct pt_regs *ctx" if self.signature: signature += ", " + self.signature data_fields = "" for i, expr in enumerate(self.values): data_fields += self._generate_field_assign(i) if self.probe_type == "t": heading = "TRACEPOINT_PROBE(%s, %s)" % \ (self.tp_category, self.tp_event) ctx_name = "args" else: heading = "int %s(%s)" % (self.probe_name, signature) ctx_name = "ctx" time_str = """ __data.timestamp_ns = bpf_ktime_get_ns();""" if self.time_field else "" cpu_str = """ __data.cpu = bpf_get_smp_processor_id();""" if self.print_cpu else "" stack_trace = "" if self.user_stack: stack_trace += """ __data.user_stack_id = %s.get_stackid( %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK );""" % (self.stacks_name, ctx_name) if self.kernel_stack: stack_trace += """ __data.kernel_stack_id = %s.get_stackid( %s, BPF_F_REUSE_STACKID );""" % (self.stacks_name, ctx_name) text = heading + """ { u64 __pid_tgid = bpf_get_current_pid_tgid(); u32 __tgid = __pid_tgid >> 32; u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half %s %s %s if (!(%s)) return 0; struct %s __data = {0}; %s %s __data.tgid = __tgid; __data.pid = __pid; bpf_get_current_comm(&__data.comm, sizeof(__data.comm)); %s %s %s.perf_submit(%s, &__data, sizeof(__data)); return 0; } """ text = text % (pid_filter, prefix, self._generate_usdt_filter_read(), self.filter, self.struct_name, time_str, cpu_str, data_fields, stack_trace, self.events_name, ctx_name) return self.streq_functions + data_decl + "\n" + text @classmethod def _time_off_str(cls, timestamp_ns): return "%.6f" % (1e-9 * (timestamp_ns - cls.first_ts)) def _display_function(self): if self.probe_type == 'p' or self.probe_type == 'r': return self.function elif self.probe_type == 'u': return self.usdt_name else: # self.probe_type == 't' return self.tp_event def print_stack(self, bpf, stack_id, tgid): if stack_id < 0: print(" %d" % stack_id) return stack = list(bpf.get_table(self.stacks_name).walk(stack_id)) for addr in stack: print(" ", end="") if Probe.print_address: print("%16x " % addr, end="") print("%s" % (bpf.sym(addr, tgid, show_module=True, show_offset=True))) def _format_message(self, bpf, tgid, values): # Replace each %K with kernel sym and %U with user sym in tgid kernel_placeholders = [i for i, t in enumerate(self.types) if t == 'K'] user_placeholders = [i for i, t in enumerate(self.types) if t == 'U'] for kp in kernel_placeholders: values[kp] = bpf.ksym(values[kp], show_offset=True) for up in user_placeholders: values[up] = bpf.sym(values[up], tgid, show_module=True, show_offset=True) return self.python_format % tuple(values) def print_event(self, bpf, cpu, data, size): # Cast as the generated structure type and display # according to the format string in the probe. event = ct.cast(data, ct.POINTER(self.python_struct)).contents values = map(lambda i: getattr(event, "v%d" % i), range(0, len(self.values))) msg = self._format_message(bpf, event.tgid, values) if Probe.print_time: time = strftime("%H:%M:%S") if Probe.use_localtime else \ Probe._time_off_str(event.timestamp_ns) print("%-8s " % time[:8], end="") if Probe.print_cpu: print("%-3s " % event.cpu, end="") print("%-7d %-7d %-15s %-16s %s" % (event.tgid, event.pid, event.comm.decode('utf-8', 'replace'), self._display_function(), msg)) if self.kernel_stack: self.print_stack(bpf, event.kernel_stack_id, -1) if self.user_stack: self.print_stack(bpf, event.user_stack_id, event.tgid) if self.user_stack or self.kernel_stack: print("") Probe.event_count += 1 if Probe.max_events is not None and \ Probe.event_count >= Probe.max_events: exit() def attach(self, bpf, verbose): if len(self.library) == 0: self._attach_k(bpf) else: self._attach_u(bpf) self.python_struct = self._generate_python_data_decl() callback = partial(self.print_event, bpf) bpf[self.events_name].open_perf_buffer(callback, page_cnt=self.page_cnt) def _attach_k(self, bpf): if self.probe_type == "r": bpf.attach_kretprobe(event=self.function, fn_name=self.probe_name) elif self.probe_type == "p": bpf.attach_kprobe(event=self.function, fn_name=self.probe_name) # Note that tracepoints don't need an explicit attach def _attach_u(self, bpf): libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: self._bail("unable to find library %s" % self.library) if self.probe_type == "u": pass # Was already enabled by the BPF constructor elif self.probe_type == "r": bpf.attach_uretprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.tgid) else: bpf.attach_uprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.tgid)
class Probe(object): probe_count = 0 streq_index = 0 max_events = None event_count = 0 first_ts = 0 use_localtime = True tgid = -1 pid = -1 page_cnt = None @classmethod def configure(cls, args): cls.max_events = args.max_events cls.print_time = args.timestamp or args.time cls.use_localtime = not args.timestamp cls.first_ts = BPF.monotonic_time() cls.tgid = args.tgid or -1 cls.pid = args.pid or -1 cls.page_cnt = args.buffer_pages def __init__(self, probe, string_size, kernel_stack, user_stack): self.usdt = None self.streq_functions = "" self.raw_probe = probe self.string_size = string_size self.kernel_stack = kernel_stack self.user_stack = user_stack Probe.probe_count += 1 self._parse_probe() self.probe_num = Probe.probe_count self.probe_name = "probe_%s_%d" % \ (self._display_function(), self.probe_num) self.probe_name = re.sub(r'[^A-Za-z0-9_]', '_', self.probe_name) def __str__(self): return "%s:%s:%s FLT=%s ACT=%s/%s" % (self.probe_type, self.library, self._display_function(), self.filter, self.types, self.values) def is_default_action(self): return self.python_format == "" def _bail(self, error): raise ValueError("error in probe '%s': %s" % (self.raw_probe, error)) def _parse_probe(self): text = self.raw_probe # There might be a function signature preceding the actual # filter/print part, or not. Find the probe specifier first -- # it ends with either a space or an open paren ( for the # function signature part. # opt. signature # probespec | rest # --------- ---------- -- (spec, sig, rest) = re.match(r'([^ \t\(]+)(\([^\(]*\))?(.*)', text).groups() self._parse_spec(spec) # Remove the parens self.signature = sig[1:-1] if sig else None if self.signature and self.probe_type in ['u', 't']: self._bail("USDT and tracepoint probes can't have " + "a function signature; use arg1, arg2, " + "... instead") text = rest.lstrip() # If we now have a (, wait for the balanced closing ) and that # will be the predicate self.filter = None if len(text) > 0 and text[0] == "(": balance = 1 for i in range(1, len(text)): if text[i] == "(": balance += 1 if text[i] == ")": balance -= 1 if balance == 0: self._parse_filter(text[:i + 1]) text = text[i + 1:] break if self.filter is None: self._bail("unmatched end of predicate") if self.filter is None: self.filter = "1" # The remainder of the text is the printf action self._parse_action(text.lstrip()) def _parse_spec(self, spec): parts = spec.split(":") # Two special cases: 'func' means 'p::func', 'lib:func' means # 'p:lib:func'. Other combinations need to provide an empty # value between delimiters, e.g. 'r::func' for a kretprobe on # the function func. if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] if len(parts[0]) == 0: self.probe_type = "p" elif parts[0] in ["p", "r", "t", "u"]: self.probe_type = parts[0] else: self._bail("probe type must be '', 'p', 't', 'r', " + "or 'u', but got '%s'" % parts[0]) if self.probe_type == "t": self.tp_category = parts[1] self.tp_event = parts[2] self.library = "" # kernel self.function = "" # from TRACEPOINT_PROBE elif self.probe_type == "u": self.library = ':'.join(parts[1:-1]) self.usdt_name = parts[-1] self.function = "" # no function, just address # We will discover the USDT provider by matching on # the USDT name in the specified library self._find_usdt_probe() else: self.library = ':'.join(parts[1:-1]) self.function = parts[-1] def _find_usdt_probe(self): target = Probe.pid if Probe.pid and Probe.pid != -1 \ else Probe.tgid self.usdt = USDT(path=self.library, pid=target) for probe in self.usdt.enumerate_probes(): if probe.name == self.usdt_name: return # Found it, will enable later self._bail("unrecognized USDT probe %s" % self.usdt_name) def _parse_filter(self, filt): self.filter = self._rewrite_expr(filt) def _parse_types(self, fmt): for match in re.finditer( r'[^%]%(s|u|d|llu|lld|hu|hd|x|llx|c|K|U)', fmt): self.types.append(match.group(1)) fmt = re.sub(r'([^%]%)(u|d|llu|lld|hu|hd)', r'\1d', fmt) fmt = re.sub(r'([^%]%)(x|llx)', r'\1x', fmt) fmt = re.sub('%K|%U', '%s', fmt) self.python_format = fmt.strip('"') def _parse_action(self, action): self.values = [] self.types = [] self.python_format = "" if len(action) == 0: return action = action.strip() match = re.search(r'(\".*?\"),?(.*)', action) if match is None: self._bail("expected format string in \"s") self.raw_format = match.group(1) self._parse_types(self.raw_format) for part in re.split('(?<!"),', match.group(2)): part = self._rewrite_expr(part) if len(part) > 0: self.values.append(part) aliases = { "retval": "PT_REGS_RC(ctx)", "arg1": "PT_REGS_PARM1(ctx)", "arg2": "PT_REGS_PARM2(ctx)", "arg3": "PT_REGS_PARM3(ctx)", "arg4": "PT_REGS_PARM4(ctx)", "arg5": "PT_REGS_PARM5(ctx)", "arg6": "PT_REGS_PARM6(ctx)", "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)", "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)", "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)", "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)", "$cpu": "bpf_get_smp_processor_id()" } def _generate_streq_function(self, string): fname = "streq_%d" % Probe.streq_index Probe.streq_index += 1 self.streq_functions += """ static inline bool %s(char const *ignored, uintptr_t str) { char needle[] = %s; char haystack[sizeof(needle)]; bpf_probe_read(&haystack, sizeof(haystack), (void *)str); for (int i = 0; i < sizeof(needle) - 1; ++i) { if (needle[i] != haystack[i]) { return false; } } return true; } """ % (fname, string) return fname def _rewrite_expr(self, expr): for alias, replacement in Probe.aliases.items(): # For USDT probes, we replace argN values with the # actual arguments for that probe obtained using # bpf_readarg_N macros emitted at BPF construction. if alias.startswith("arg") and self.probe_type == "u": continue expr = expr.replace(alias, replacement) matches = re.finditer('STRCMP\\(("[^"]+\\")', expr) for match in matches: string = match.group(1) fname = self._generate_streq_function(string) expr = expr.replace("STRCMP", fname, 1) return expr p_type = {"u": ct.c_uint, "d": ct.c_int, "llu": ct.c_ulonglong, "lld": ct.c_longlong, "hu": ct.c_ushort, "hd": ct.c_short, "x": ct.c_uint, "llx": ct.c_ulonglong, "c": ct.c_ubyte, "K": ct.c_ulonglong, "U": ct.c_ulonglong} def _generate_python_field_decl(self, idx, fields): field_type = self.types[idx] if field_type == "s": ptype = ct.c_char * self.string_size else: ptype = Probe.p_type[field_type] fields.append(("v%d" % idx, ptype)) def _generate_python_data_decl(self): self.python_struct_name = "%s_%d_Data" % \ (self._display_function(), self.probe_num) fields = [ ("timestamp_ns", ct.c_ulonglong), ("tgid", ct.c_uint), ("pid", ct.c_uint), ("comm", ct.c_char * 16) # TASK_COMM_LEN ] for i in range(0, len(self.types)): self._generate_python_field_decl(i, fields) if self.kernel_stack: fields.append(("kernel_stack_id", ct.c_int)) if self.user_stack: fields.append(("user_stack_id", ct.c_int)) return type(self.python_struct_name, (ct.Structure,), dict(_fields_=fields)) c_type = {"u": "unsigned int", "d": "int", "llu": "unsigned long long", "lld": "long long", "hu": "unsigned short", "hd": "short", "x": "unsigned int", "llx": "unsigned long long", "c": "char", "K": "unsigned long long", "U": "unsigned long long"} fmt_types = c_type.keys() def _generate_field_decl(self, idx): field_type = self.types[idx] if field_type == "s": return "char v%d[%d];\n" % (idx, self.string_size) if field_type in Probe.fmt_types: return "%s v%d;\n" % (Probe.c_type[field_type], idx) self._bail("unrecognized format specifier %s" % field_type) def _generate_data_decl(self): # The BPF program will populate values into the struct # according to the format string, and the Python program will # construct the final display string. self.events_name = "%s_events" % self.probe_name self.struct_name = "%s_data_t" % self.probe_name self.stacks_name = "%s_stacks" % self.probe_name stack_table = "BPF_STACK_TRACE(%s, 1024);" % self.stacks_name \ if (self.kernel_stack or self.user_stack) else "" data_fields = "" for i, field_type in enumerate(self.types): data_fields += " " + \ self._generate_field_decl(i) kernel_stack_str = " int kernel_stack_id;" \ if self.kernel_stack else "" user_stack_str = " int user_stack_id;" \ if self.user_stack else "" text = """ struct %s { u64 timestamp_ns; u32 tgid; u32 pid; char comm[TASK_COMM_LEN]; %s %s %s }; BPF_PERF_OUTPUT(%s); %s """ return text % (self.struct_name, data_fields, kernel_stack_str, user_stack_str, self.events_name, stack_table) def _generate_field_assign(self, idx): field_type = self.types[idx] expr = self.values[idx].strip() text = "" if self.probe_type == "u" and expr[0:3] == "arg": arg_index = int(expr[3]) arg_ctype = self.usdt.get_probe_arg_ctype( self.usdt_name, arg_index - 1) text = (" %s %s = 0;\n" + " bpf_usdt_readarg(%s, ctx, &%s);\n") \ % (arg_ctype, expr, expr[3], expr) if field_type == "s": return text + """ if (%s != 0) { bpf_probe_read(&__data.v%d, sizeof(__data.v%d), (void *)%s); } """ % (expr, idx, idx, expr) if field_type in Probe.fmt_types: return text + " __data.v%d = (%s)%s;\n" % \ (idx, Probe.c_type[field_type], expr) self._bail("unrecognized field type %s" % field_type) def _generate_usdt_filter_read(self): text = "" if self.probe_type != "u": return text for arg, _ in Probe.aliases.items(): if not (arg.startswith("arg") and (arg in self.filter)): continue arg_index = int(arg.replace("arg", "")) arg_ctype = self.usdt.get_probe_arg_ctype( self.usdt_name, arg_index - 1) if not arg_ctype: self._bail("Unable to determine type of {} " "in the filter".format(arg)) text += """ {} {}_filter; bpf_usdt_readarg({}, ctx, &{}_filter); """.format(arg_ctype, arg, arg_index, arg) self.filter = self.filter.replace( arg, "{}_filter".format(arg)) return text def generate_program(self, include_self): data_decl = self._generate_data_decl() if Probe.pid != -1: pid_filter = """ if (__pid != %d) { return 0; } """ % Probe.pid # uprobes can have a built-in tgid filter passed to # attach_uprobe, hence the check here -- for kprobes, we # need to do the tgid test by hand: elif len(self.library) == 0 and Probe.tgid != -1: pid_filter = """ if (__tgid != %d) { return 0; } """ % Probe.tgid elif not include_self: pid_filter = """ if (__tgid == %d) { return 0; } """ % os.getpid() else: pid_filter = "" prefix = "" signature = "struct pt_regs *ctx" if self.signature: signature += ", " + self.signature data_fields = "" for i, expr in enumerate(self.values): data_fields += self._generate_field_assign(i) if self.probe_type == "t": heading = "TRACEPOINT_PROBE(%s, %s)" % \ (self.tp_category, self.tp_event) ctx_name = "args" else: heading = "int %s(%s)" % (self.probe_name, signature) ctx_name = "ctx" stack_trace = "" if self.user_stack: stack_trace += """ __data.user_stack_id = %s.get_stackid( %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK );""" % (self.stacks_name, ctx_name) if self.kernel_stack: stack_trace += """ __data.kernel_stack_id = %s.get_stackid( %s, BPF_F_REUSE_STACKID );""" % (self.stacks_name, ctx_name) text = heading + """ { u64 __pid_tgid = bpf_get_current_pid_tgid(); u32 __tgid = __pid_tgid >> 32; u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half %s %s %s if (!(%s)) return 0; struct %s __data = {0}; __data.timestamp_ns = bpf_ktime_get_ns(); __data.tgid = __tgid; __data.pid = __pid; bpf_get_current_comm(&__data.comm, sizeof(__data.comm)); %s %s %s.perf_submit(%s, &__data, sizeof(__data)); return 0; } """ text = text % (pid_filter, prefix, self._generate_usdt_filter_read(), self.filter, self.struct_name, data_fields, stack_trace, self.events_name, ctx_name) return self.streq_functions + data_decl + "\n" + text @classmethod def _time_off_str(cls, timestamp_ns): return "%.6f" % (1e-9 * (timestamp_ns - cls.first_ts)) def _display_function(self): if self.probe_type == 'p' or self.probe_type == 'r': return self.function elif self.probe_type == 'u': return self.usdt_name else: # self.probe_type == 't' return self.tp_event def print_stack(self, bpf, stack_id, tgid): if stack_id < 0: print(" %d" % stack_id) return stack = list(bpf.get_table(self.stacks_name).walk(stack_id)) for addr in stack: print(" %s" % (bpf.sym(addr, tgid, show_module=True, show_offset=True))) def _format_message(self, bpf, tgid, values): # Replace each %K with kernel sym and %U with user sym in tgid kernel_placeholders = [i for i, t in enumerate(self.types) if t == 'K'] user_placeholders = [i for i, t in enumerate(self.types) if t == 'U'] for kp in kernel_placeholders: values[kp] = bpf.ksym(values[kp], show_offset=True) for up in user_placeholders: values[up] = bpf.sym(values[up], tgid, show_module=True, show_offset=True) return self.python_format % tuple(values) def print_event(self, bpf, cpu, data, size): # Cast as the generated structure type and display # according to the format string in the probe. event = ct.cast(data, ct.POINTER(self.python_struct)).contents values = map(lambda i: getattr(event, "v%d" % i), range(0, len(self.values))) msg = self._format_message(bpf, event.tgid, values) if not Probe.print_time: print("%-6d %-6d %-12s %-16s %s" % (event.tgid, event.pid, event.comm.decode(), self._display_function(), msg)) else: time = strftime("%H:%M:%S") if Probe.use_localtime else \ Probe._time_off_str(event.timestamp_ns) print("%-8s %-6d %-6d %-12s %-16s %s" % (time[:8], event.tgid, event.pid, event.comm.decode(), self._display_function(), msg)) if self.kernel_stack: self.print_stack(bpf, event.kernel_stack_id, -1) if self.user_stack: self.print_stack(bpf, event.user_stack_id, event.tgid) if self.user_stack or self.kernel_stack: print("") Probe.event_count += 1 if Probe.max_events is not None and \ Probe.event_count >= Probe.max_events: exit() def attach(self, bpf, verbose): if len(self.library) == 0: self._attach_k(bpf) else: self._attach_u(bpf) self.python_struct = self._generate_python_data_decl() callback = partial(self.print_event, bpf) bpf[self.events_name].open_perf_buffer(callback, page_cnt=self.page_cnt) def _attach_k(self, bpf): if self.probe_type == "r": bpf.attach_kretprobe(event=self.function, fn_name=self.probe_name) elif self.probe_type == "p": bpf.attach_kprobe(event=self.function, fn_name=self.probe_name) # Note that tracepoints don't need an explicit attach def _attach_u(self, bpf): libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: self._bail("unable to find library %s" % self.library) if self.probe_type == "u": pass # Was already enabled by the BPF constructor elif self.probe_type == "r": bpf.attach_uretprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.tgid) else: bpf.attach_uprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.tgid)
class Probe(object): next_probe_index = 0 streq_index = 0 aliases = {"$PID": "(bpf_get_current_pid_tgid() >> 32)"} def _substitute_aliases(self, expr): if expr is None: return expr for alias, subst in Probe.aliases.items(): expr = expr.replace(alias, subst) return expr def _parse_signature(self): params = map(str.strip, self.signature.split(',')) self.param_types = {} for param in params: # If the type is a pointer, the * can be next to the # param name. Other complex types like arrays are not # supported right now. index = param.rfind('*') index = index if index != -1 else param.rfind(' ') param_type = param[0:index + 1].strip() param_name = param[index + 1:].strip() self.param_types[param_name] = param_type def _generate_entry(self): self.entry_probe_func = self.probe_func_name + "_entry" text = """ int PROBENAME(struct pt_regs *ctx SIGNATURE) { u64 __pid_tgid = bpf_get_current_pid_tgid(); u32 __pid = __pid_tgid; // lower 32 bits u32 __tgid = __pid_tgid >> 32; // upper 32 bits PID_FILTER COLLECT return 0; } """ text = text.replace("PROBENAME", self.entry_probe_func) text = text.replace( "SIGNATURE", "" if len(self.signature) == 0 else ", " + self.signature) text = text.replace("PID_FILTER", self._generate_pid_filter()) collect = "" for pname in self.args_to_probe: param_hash = self.hashname_prefix + pname if pname == "__latency": collect += """ u64 __time = bpf_ktime_get_ns(); %s.update(&__pid, &__time); """ % param_hash else: collect += "%s.update(&__pid, &%s);\n" % \ (param_hash, pname) text = text.replace("COLLECT", collect) return text def _generate_entry_probe(self): # Any $entry(name) expressions result in saving that argument # when entering the function. self.args_to_probe = set() regex = r"\$entry\((\w+)\)" for expr in self.exprs: for arg in re.finditer(regex, expr): self.args_to_probe.add(arg.group(1)) for arg in re.finditer(regex, self.filter): self.args_to_probe.add(arg.group(1)) if any(map(lambda expr: "$latency" in expr, self.exprs)) or \ "$latency" in self.filter: self.args_to_probe.add("__latency") self.param_types["__latency"] = "u64" # nanoseconds for pname in self.args_to_probe: if pname not in self.param_types: raise ValueError("$entry(%s): no such param" % arg) self.hashname_prefix = "%s_param_" % self.probe_hash_name text = "" for pname in self.args_to_probe: # Each argument is stored in a separate hash that is # keyed by pid. text += "BPF_HASH(%s, u32, %s);\n" % \ (self.hashname_prefix + pname, self.param_types[pname]) text += self._generate_entry() return text def _generate_retprobe_prefix(self): # After we're done here, there are __%s_val variables for each # argument we needed to probe using $entry(name), and they all # have values (which isn't necessarily the case if we missed # the method entry probe). text = "" self.param_val_names = {} for pname in self.args_to_probe: val_name = "__%s_val" % pname text += "%s *%s = %s.lookup(&__pid);\n" % \ (self.param_types[pname], val_name, self.hashname_prefix + pname) text += "if (%s == 0) { return 0 ; }\n" % val_name self.param_val_names[pname] = val_name return text def _replace_entry_exprs(self): for pname, vname in self.param_val_names.items(): if pname == "__latency": entry_expr = "$latency" val_expr = "(bpf_ktime_get_ns() - *%s)" % vname else: entry_expr = "$entry(%s)" % pname val_expr = "(*%s)" % vname for i in range(0, len(self.exprs)): self.exprs[i] = self.exprs[i].replace(entry_expr, val_expr) self.filter = self.filter.replace(entry_expr, val_expr) def _attach_entry_probe(self): if self.is_user: self.bpf.attach_uprobe(name=self.library, sym=self.function, fn_name=self.entry_probe_func, pid=self.pid or -1) else: self.bpf.attach_kprobe(event=self.function, fn_name=self.entry_probe_func) def _bail(self, error): raise ValueError("error parsing probe '%s': %s" % (self.raw_spec, error)) def _validate_specifier(self): # Everything after '#' is the probe label, ignore it spec = self.raw_spec.split('#')[0] parts = spec.strip().split(':') if len(parts) < 3: self._bail("at least the probe type, library, and " + "function signature must be specified") if len(parts) > 6: self._bail("extraneous ':'-separated parts detected") if parts[0] not in ["r", "p", "t", "u"]: self._bail("probe type must be 'p', 'r', 't', or 'u'" + " but got '%s'" % parts[0]) if re.match(r"\S+\(.*\)", parts[2]) is None: self._bail(("function signature '%s' has an invalid " + "format") % parts[2]) def _parse_expr_types(self, expr_types): if len(expr_types) == 0: self._bail("no expr types specified") self.expr_types = expr_types.split(',') def _parse_exprs(self, exprs): if len(exprs) == 0: self._bail("no exprs specified") self.exprs = exprs.split(',') def _make_valid_identifier(self, ident): return re.sub(r'[^A-Za-z0-9_]', '_', ident) def __init__(self, tool, type, specifier): self.usdt_ctx = None self.streq_functions = "" self.pid = tool.args.pid self.cumulative = tool.args.cumulative or False self.raw_spec = specifier self._validate_specifier() spec_and_label = specifier.split('#') self.label = spec_and_label[1] \ if len(spec_and_label) == 2 else None parts = spec_and_label[0].strip().split(':') self.type = type # hist or freq self.probe_type = parts[0] fparts = parts[2].split('(') self.function = fparts[0].strip() if self.probe_type == "t": self.library = "" # kernel self.tp_category = parts[1] self.tp_event = self.function elif self.probe_type == "u": self.library = parts[1] self.probe_func_name = self._make_valid_identifier( "%s_probe%d" % (self.function, Probe.next_probe_index)) self._enable_usdt_probe() else: self.library = parts[1] self.is_user = len(self.library) > 0 self.signature = fparts[1].strip()[:-1] self._parse_signature() # If the user didn't specify an expression to probe, we probe # the retval in a ret probe, or simply the value "1" otherwise. self.is_default_expr = len(parts) < 5 if not self.is_default_expr: self._parse_expr_types(parts[3]) self._parse_exprs(parts[4]) if len(self.exprs) != len(self.expr_types): self._bail("mismatched # of exprs and types") if self.type == "hist" and len(self.expr_types) > 1: self._bail("histograms can only have 1 expr") else: if not self.probe_type == "r" and self.type == "hist": self._bail("histograms must have expr") self.expr_types = \ ["u64" if not self.probe_type == "r" else "int"] self.exprs = \ ["1" if not self.probe_type == "r" else "$retval"] self.filter = "" if len(parts) != 6 else parts[5] self._substitute_exprs() # Do we need to attach an entry probe so that we can collect an # argument that is required for an exit (return) probe? def check(expr): keywords = ["$entry", "$latency"] return any(map(lambda kw: kw in expr, keywords)) self.entry_probe_required = self.probe_type == "r" and \ (any(map(check, self.exprs)) or check(self.filter)) self.probe_func_name = self._make_valid_identifier( "%s_probe%d" % (self.function, Probe.next_probe_index)) self.probe_hash_name = self._make_valid_identifier( "%s_hash%d" % (self.function, Probe.next_probe_index)) Probe.next_probe_index += 1 def _enable_usdt_probe(self): self.usdt_ctx = USDT(path=self.library, pid=self.pid) self.usdt_ctx.enable_probe(self.function, self.probe_func_name) def _generate_streq_function(self, string): fname = "streq_%d" % Probe.streq_index Probe.streq_index += 1 self.streq_functions += """ static inline bool %s(char const *ignored, char const *str) { char needle[] = %s; char haystack[sizeof(needle)]; bpf_probe_read(&haystack, sizeof(haystack), (void *)str); for (int i = 0; i < sizeof(needle) - 1; ++i) { if (needle[i] != haystack[i]) { return false; } } return true; } """ % (fname, string) return fname def _substitute_exprs(self): def repl(expr): expr = self._substitute_aliases(expr) matches = re.finditer('STRCMP\\(("[^"]+\\")', expr) for match in matches: string = match.group(1) fname = self._generate_streq_function(string) expr = expr.replace("STRCMP", fname, 1) return expr.replace("$retval", "PT_REGS_RC(ctx)") for i in range(0, len(self.exprs)): self.exprs[i] = repl(self.exprs[i]) self.filter = repl(self.filter) def _is_string(self, expr_type): return expr_type == "char*" or expr_type == "char *" def _generate_hash_field(self, i): if self._is_string(self.expr_types[i]): return "struct __string_t v%d;\n" % i else: return "%s v%d;\n" % (self.expr_types[i], i) def _generate_usdt_arg_assignment(self, i): expr = self.exprs[i] if self.probe_type == "u" and expr[0:3] == "arg": arg_index = int(expr[3]) arg_ctype = self.usdt_ctx.get_probe_arg_ctype( self.function, arg_index - 1) return (" %s %s = 0;\n" + " bpf_usdt_readarg(%s, ctx, &%s);\n") \ % (arg_ctype, expr, expr[3], expr) else: return "" def _generate_field_assignment(self, i): text = self._generate_usdt_arg_assignment(i) if self._is_string(self.expr_types[i]): return (text + " bpf_probe_read(&__key.v%d.s," + " sizeof(__key.v%d.s), (void *)%s);\n") % \ (i, i, self.exprs[i]) else: return text + " __key.v%d = %s;\n" % \ (i, self.exprs[i]) def _generate_hash_decl(self): if self.type == "hist": return "BPF_HISTOGRAM(%s, %s);" % \ (self.probe_hash_name, self.expr_types[0]) else: text = "struct %s_key_t {\n" % self.probe_hash_name for i in range(0, len(self.expr_types)): text += self._generate_hash_field(i) text += "};\n" text += "BPF_HASH(%s, struct %s_key_t, u64);\n" % \ (self.probe_hash_name, self.probe_hash_name) return text def _generate_key_assignment(self): if self.type == "hist": return self._generate_usdt_arg_assignment(0) + \ ("%s __key = %s;\n" % (self.expr_types[0], self.exprs[0])) else: text = "struct %s_key_t __key = {};\n" % \ self.probe_hash_name for i in range(0, len(self.exprs)): text += self._generate_field_assignment(i) return text def _generate_hash_update(self): if self.type == "hist": return "%s.increment(bpf_log2l(__key));" % \ self.probe_hash_name else: return "%s.increment(__key);" % self.probe_hash_name def _generate_pid_filter(self): # Kernel probes need to explicitly filter pid, because the # attach interface doesn't support pid filtering if self.pid is not None and not self.is_user: return "if (__tgid != %d) { return 0; }" % self.pid else: return "" def generate_text(self): program = "" probe_text = """ DATA_DECL """ + ("TRACEPOINT_PROBE(%s, %s)" % (self.tp_category, self.tp_event) if self.probe_type == "t" else "int PROBENAME(struct pt_regs *ctx SIGNATURE)") + """ { u64 __pid_tgid = bpf_get_current_pid_tgid(); u32 __pid = __pid_tgid; // lower 32 bits u32 __tgid = __pid_tgid >> 32; // upper 32 bits PID_FILTER PREFIX if (!(FILTER)) return 0; KEY_EXPR COLLECT return 0; } """ prefix = "" signature = "" # If any entry arguments are probed in a ret probe, we need # to generate an entry probe to collect them if self.entry_probe_required: program += self._generate_entry_probe() prefix += self._generate_retprobe_prefix() # Replace $entry(paramname) with a reference to the # value we collected when entering the function: self._replace_entry_exprs() if self.probe_type == "p" and len(self.signature) > 0: # Only entry uprobes/kprobes can have user-specified # signatures. Other probes force it to (). signature = ", " + self.signature program += probe_text.replace("PROBENAME", self.probe_func_name) program = program.replace("SIGNATURE", signature) program = program.replace("PID_FILTER", self._generate_pid_filter()) decl = self._generate_hash_decl() key_expr = self._generate_key_assignment() collect = self._generate_hash_update() program = program.replace("DATA_DECL", decl) program = program.replace("KEY_EXPR", key_expr) program = program.replace( "FILTER", "1" if len(self.filter) == 0 else self.filter) program = program.replace("COLLECT", collect) program = program.replace("PREFIX", prefix) return self.streq_functions + program def _attach_u(self): libpath = BPF.find_library(self.library) if libpath is None: libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: self._bail("unable to find library %s" % self.library) if self.probe_type == "r": self.bpf.attach_uretprobe(name=libpath, sym=self.function, fn_name=self.probe_func_name, pid=self.pid or -1) else: self.bpf.attach_uprobe(name=libpath, sym=self.function, fn_name=self.probe_func_name, pid=self.pid or -1) def _attach_k(self): if self.probe_type == "t": pass # Nothing to do for tracepoints elif self.probe_type == "r": self.bpf.attach_kretprobe(event=self.function, fn_name=self.probe_func_name) else: self.bpf.attach_kprobe(event=self.function, fn_name=self.probe_func_name) def attach(self, bpf): self.bpf = bpf if self.probe_type == "u": return if self.is_user: self._attach_u() else: self._attach_k() if self.entry_probe_required: self._attach_entry_probe() def _v2s(self, v): # Most fields can be converted with plain str(), but strings # are wrapped in a __string_t which has an .s field if "__string_t" in type(v).__name__: return str(v.s) return str(v) def _display_expr(self, i): # Replace ugly latency calculation with $latency expr = self.exprs[i].replace("(bpf_ktime_get_ns() - *____latency_val)", "$latency") # Replace alias values back with the alias name for alias, subst in Probe.aliases.items(): expr = expr.replace(subst, alias) # Replace retval expression with $retval expr = expr.replace("PT_REGS_RC(ctx)", "$retval") # Replace ugly (*__param_val) expressions with param name return re.sub(r"\(\*__(\w+)_val\)", r"\1", expr) def _display_key(self, key): if self.is_default_expr: if not self.probe_type == "r": return "total calls" else: return "retval = %s" % str(key.v0) else: # The key object has v0, ..., vk fields containing # the values of the expressions from self.exprs def str_i(i): key_i = self._v2s(getattr(key, "v%d" % i)) return "%s = %s" % \ (self._display_expr(i), key_i) return ", ".join(map(str_i, range(0, len(self.exprs)))) def display(self, top): data = self.bpf.get_table(self.probe_hash_name) if self.type == "freq": print(self.label or self.raw_spec) print("\t%-10s %s" % ("COUNT", "EVENT")) sdata = sorted(data.items(), key=lambda p: p[1].value) if top is not None: sdata = sdata[-top:] for key, value in sdata: # Print some nice values if the user didn't # specify an expression to probe if self.is_default_expr: if not self.probe_type == "r": key_str = "total calls" else: key_str = "retval = %s" % \ self._v2s(key.v0) else: key_str = self._display_key(key) print("\t%-10s %s" % (str(value.value), key_str)) elif self.type == "hist": label = self.label or (self._display_expr(0) if not self.is_default_expr else "retval") data.print_log2_hist(val_type=label) if not self.cumulative: data.clear() def __str__(self): return self.label or self.raw_spec
class Probe(object): probe_count = 0 streq_index = 0 max_events = None event_count = 0 first_ts = 0 first_ts_real = None print_time = False print_unix_timestamp = False use_localtime = True time_field = False print_cpu = False print_address = False tgid = -1 pid = -1 page_cnt = None build_id_enabled = False @classmethod def configure(cls, args): cls.max_events = args.max_events cls.print_time = args.timestamp or args.time cls.print_unix_timestamp = args.unix_timestamp cls.use_localtime = not args.timestamp cls.time_field = cls.print_time and (not cls.use_localtime) cls.print_cpu = args.print_cpu cls.print_address = args.address cls.first_ts = BPF.monotonic_time() cls.first_ts_real = time.time() cls.tgid = args.tgid or -1 cls.pid = args.pid or -1 cls.page_cnt = args.buffer_pages cls.bin_cmp = args.bin_cmp cls.build_id_enabled = args.sym_file_list is not None def __init__(self, probe, string_size, kernel_stack, user_stack, cgroup_map_name, name, msg_filter): self.usdt = None self.streq_functions = "" self.raw_probe = probe self.string_size = string_size self.kernel_stack = kernel_stack self.user_stack = user_stack self.probe_user_list = set() Probe.probe_count += 1 self._parse_probe() self.probe_num = Probe.probe_count self.probe_name = "probe_%s_%d" % \ (self._display_function(), self.probe_num) self.probe_name = re.sub(r'[^A-Za-z0-9_]', '_', self.probe_name) self.cgroup_map_name = cgroup_map_name self.name = name self.msg_filter = msg_filter # compiler can generate proper codes for function # signatures with "syscall__" prefix if self.is_syscall_kprobe: self.probe_name = "syscall__" + self.probe_name[6:] def __str__(self): return "%s:%s:%s FLT=%s ACT=%s/%s" % ( self.probe_type, self.library, self._display_function(), self.filter, self.types, self.values) def is_default_action(self): return self.python_format == "" def _bail(self, error): raise ValueError("error in probe '%s': %s" % (self.raw_probe, error)) def _parse_probe(self): text = self.raw_probe # There might be a function signature preceding the actual # filter/print part, or not. Find the probe specifier first -- # it ends with either a space or an open paren ( for the # function signature part. # opt. signature # probespec | rest # --------- ---------- -- (spec, sig, rest) = re.match(r'([^ \t\(]+)(\([^\(]*\))?(.*)', text).groups() self._parse_spec(spec) # Remove the parens self.signature = sig[1:-1] if sig else None if self.signature and self.probe_type in ['u', 't']: self._bail("USDT and tracepoint probes can't have " + "a function signature; use arg1, arg2, " + "... instead") text = rest.lstrip() # If we now have a (, wait for the balanced closing ) and that # will be the predicate self.filter = None if len(text) > 0 and text[0] == "(": balance = 1 for i in range(1, len(text)): if text[i] == "(": balance += 1 if text[i] == ")": balance -= 1 if balance == 0: self._parse_filter(text[:i + 1]) text = text[i + 1:] break if self.filter is None: self._bail("unmatched end of predicate") if self.filter is None: self.filter = "1" # The remainder of the text is the printf action self._parse_action(text.lstrip()) def _parse_spec(self, spec): parts = spec.split(":") # Two special cases: 'func' means 'p::func', 'lib:func' means # 'p:lib:func'. Other combinations need to provide an empty # value between delimiters, e.g. 'r::func' for a kretprobe on # the function func. if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] if len(parts[0]) == 0: self.probe_type = "p" elif parts[0] in ["p", "r", "t", "u"]: self.probe_type = parts[0] else: self._bail("probe type must be '', 'p', 't', 'r', " + "or 'u', but got '%s'" % parts[0]) if self.probe_type == "t": self.tp_category = parts[1] self.tp_event = parts[2] self.library = "" # kernel self.function = "" # from TRACEPOINT_PROBE elif self.probe_type == "u": # u:<library>[:<provider>]:<probe> where :<provider> is optional self.library = parts[1] self.usdt_name = ":".join(parts[2:]) self.function = "" # no function, just address # We will discover the USDT provider by matching on # the USDT name in the specified library self._find_usdt_probe() else: self.library = ':'.join(parts[1:-1]) self.function = parts[-1] # only x64 syscalls needs checking, no other syscall wrapper yet. self.is_syscall_kprobe = False if self.probe_type == "p" and len(self.library) == 0 and \ self.function[:10] == "__x64_sys_": self.is_syscall_kprobe = True def _find_usdt_probe(self): target = Probe.pid if Probe.pid and Probe.pid != -1 \ else Probe.tgid self.usdt = USDT(path=self.library, pid=target) parts = self.usdt_name.split(":") if len(parts) == 1: provider_name = None usdt_name = parts[0].encode("ascii") else: provider_name = parts[0].encode("ascii") usdt_name = parts[1].encode("ascii") for probe in self.usdt.enumerate_probes(): if ((not provider_name or probe.provider == provider_name) and probe.name == usdt_name): return # Found it, will enable later self._bail("unrecognized USDT probe %s" % self.usdt_name) def _parse_filter(self, filt): self.filter = self._rewrite_expr(filt) def _parse_types(self, fmt): for match in re.finditer( r'[^%]%(s|u|d|lu|llu|ld|lld|hu|hd|x|lx|llx|c|K|U)', fmt): self.types.append(match.group(1)) fmt = re.sub(r'([^%]%)(u|d|lu|llu|ld|lld|hu|hd)', r'\1d', fmt) fmt = re.sub(r'([^%]%)(x|lx|llx)', r'\1x', fmt) fmt = re.sub('%K|%U', '%s', fmt) self.python_format = fmt.strip('"') def _parse_action(self, action): self.values = [] self.types = [] self.python_format = "" if len(action) == 0: return action = action.strip() match = re.search(r'(\".*?\"),?(.*)', action) if match is None: self._bail("expected format string in \"s") self.raw_format = match.group(1) self._parse_types(self.raw_format) for part in re.split('(?<!"),', match.group(2)): part = self._rewrite_expr(part) if len(part) > 0: self.values.append(part) aliases_arg = { "arg1": "PT_REGS_PARM1(ctx)", "arg2": "PT_REGS_PARM2(ctx)", "arg3": "PT_REGS_PARM3(ctx)", "arg4": "PT_REGS_PARM4(ctx)", "arg5": "PT_REGS_PARM5(ctx)", "arg6": "PT_REGS_PARM6(ctx)", } aliases_indarg = { "arg1": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM1(_ctx))); _val;})", "arg2": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM2(_ctx))); _val;})", "arg3": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM3(_ctx))); _val;})", "arg4": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM4(_ctx))); _val;})", "arg5": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM5(_ctx))); _val;})", "arg6": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM6(_ctx))); _val;})", } aliases_common = { "retval": "PT_REGS_RC(ctx)", "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)", "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)", "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)", "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)", "$cpu": "bpf_get_smp_processor_id()", "$task": "((struct task_struct *)bpf_get_current_task())" } def _rewrite_expr(self, expr): # Find the occurances of any arg[1-6]@user. Use it later to # identify bpf_probe_read_user for matches in re.finditer(r'(arg[1-6])(@user)', expr): if matches.group(1).strip() not in self.probe_user_list: self.probe_user_list.add(matches.group(1).strip()) # Remove @user occurrences from arg before resolving to its # corresponding aliases. expr = re.sub(r'(arg[1-6])@user', r'\1', expr) rdict = StrcmpRewrite.rewrite_expr(expr, self.bin_cmp, self.library, self.probe_user_list, self.streq_functions, Probe.streq_index) expr = rdict["expr"] self.streq_functions = rdict["streq_functions"] Probe.streq_index = rdict["probeid"] alias_to_check = Probe.aliases_indarg \ if self.is_syscall_kprobe \ else Probe.aliases_arg # For USDT probes, we replace argN values with the # actual arguments for that probe obtained using # bpf_readarg_N macros emitted at BPF construction. if not self.probe_type == "u": for alias, replacement in alias_to_check.items(): expr = expr.replace(alias, replacement) for alias, replacement in Probe.aliases_common.items(): expr = expr.replace(alias, replacement) return expr p_type = { "u": ct.c_uint, "d": ct.c_int, "lu": ct.c_ulong, "ld": ct.c_long, "llu": ct.c_ulonglong, "lld": ct.c_longlong, "hu": ct.c_ushort, "hd": ct.c_short, "x": ct.c_uint, "lx": ct.c_ulong, "llx": ct.c_ulonglong, "c": ct.c_ubyte, "K": ct.c_ulonglong, "U": ct.c_ulonglong } def _generate_python_field_decl(self, idx, fields): field_type = self.types[idx] if field_type == "s": ptype = ct.c_char * self.string_size else: ptype = Probe.p_type[field_type] fields.append(("v%d" % idx, ptype)) def _generate_python_data_decl(self): self.python_struct_name = "%s_%d_Data" % \ (self._display_function(), self.probe_num) fields = [] if self.time_field: fields.append(("timestamp_ns", ct.c_ulonglong)) if self.print_cpu: fields.append(("cpu", ct.c_int)) fields.extend([ ("tgid", ct.c_uint), ("pid", ct.c_uint), ("comm", ct.c_char * 16) # TASK_COMM_LEN ]) for i in range(0, len(self.types)): self._generate_python_field_decl(i, fields) if self.kernel_stack: fields.append(("kernel_stack_id", ct.c_int)) if self.user_stack: fields.append(("user_stack_id", ct.c_int)) return type(self.python_struct_name, (ct.Structure, ), dict(_fields_=fields)) c_type = { "u": "unsigned int", "d": "int", "lu": "unsigned long", "ld": "long", "llu": "unsigned long long", "lld": "long long", "hu": "unsigned short", "hd": "short", "x": "unsigned int", "lx": "unsigned long", "llx": "unsigned long long", "c": "char", "K": "unsigned long long", "U": "unsigned long long" } fmt_types = c_type.keys() def _generate_field_decl(self, idx): field_type = self.types[idx] if field_type == "s": return "char v%d[%d];\n" % (idx, self.string_size) if field_type in Probe.fmt_types: return "%s v%d;\n" % (Probe.c_type[field_type], idx) self._bail("unrecognized format specifier %s" % field_type) def _generate_data_decl(self): # The BPF program will populate values into the struct # according to the format string, and the Python program will # construct the final display string. self.events_name = "%s_events" % self.probe_name self.struct_name = "%s_data_t" % self.probe_name self.stacks_name = "%s_stacks" % self.probe_name stack_type = "BPF_STACK_TRACE" if self.build_id_enabled is False \ else "BPF_STACK_TRACE_BUILDID" stack_table = "%s(%s, 1024);" % (stack_type,self.stacks_name) \ if (self.kernel_stack or self.user_stack) else "" data_fields = "" for i, field_type in enumerate(self.types): data_fields += " " + \ self._generate_field_decl(i) time_str = "u64 timestamp_ns;" if self.time_field else "" cpu_str = "int cpu;" if self.print_cpu else "" kernel_stack_str = " int kernel_stack_id;" \ if self.kernel_stack else "" user_stack_str = " int user_stack_id;" \ if self.user_stack else "" text = """ struct %s { %s %s u32 tgid; u32 pid; char comm[TASK_COMM_LEN]; %s %s %s }; BPF_PERF_OUTPUT(%s); %s """ return text % (self.struct_name, time_str, cpu_str, data_fields, kernel_stack_str, user_stack_str, self.events_name, stack_table) def _generate_field_assign(self, idx): field_type = self.types[idx] expr = self.values[idx].strip() text = "" if self.probe_type == "u" and expr[0:3] == "arg": arg_index = int(expr[3]) arg_ctype = self.usdt.get_probe_arg_ctype(self.usdt_name, arg_index - 1) text = (" %s %s = 0;\n" + " bpf_usdt_readarg(%s, ctx, &%s);\n") \ % (arg_ctype, expr, expr[3], expr) probe_read_func = "bpf_probe_read" if field_type == "s": if self.library: probe_read_func = "bpf_probe_read_user" else: alias_to_check = Probe.aliases_indarg \ if self.is_syscall_kprobe \ else Probe.aliases_arg for arg, alias in alias_to_check.items(): if alias == expr and arg in self.probe_user_list: probe_read_func = "bpf_probe_read_user" break return text + """ if (%s != 0) { void *__tmp = (void *)%s; %s(&__data.v%d, sizeof(__data.v%d), __tmp); } """ % (expr, expr, probe_read_func, idx, idx) if field_type in Probe.fmt_types: return text + " __data.v%d = (%s)%s;\n" % \ (idx, Probe.c_type[field_type], expr) self._bail("unrecognized field type %s" % field_type) def _generate_usdt_filter_read(self): text = "" if self.probe_type != "u": return text for arg, _ in Probe.aliases_arg.items(): if not (arg in self.filter): continue arg_index = int(arg.replace("arg", "")) arg_ctype = self.usdt.get_probe_arg_ctype(self.usdt_name, arg_index - 1) if not arg_ctype: self._bail("Unable to determine type of {} " "in the filter".format(arg)) text += """ {} {}_filter; bpf_usdt_readarg({}, ctx, &{}_filter); """.format(arg_ctype, arg, arg_index, arg) self.filter = self.filter.replace(arg, "{}_filter".format(arg)) return text def generate_program(self, include_self): data_decl = self._generate_data_decl() if Probe.pid != -1: pid_filter = """ if (__pid != %d) { return 0; } """ % Probe.pid # uprobes can have a built-in tgid filter passed to # attach_uprobe, hence the check here -- for kprobes, we # need to do the tgid test by hand: elif len(self.library) == 0 and Probe.tgid != -1: pid_filter = """ if (__tgid != %d) { return 0; } """ % Probe.tgid elif not include_self: pid_filter = """ if (__tgid == %d) { return 0; } """ % os.getpid() else: pid_filter = "" if self.cgroup_map_name is not None: cgroup_filter = """ if (%s.check_current_task(0) <= 0) { return 0; } """ % self.cgroup_map_name else: cgroup_filter = "" prefix = "" signature = "struct pt_regs *ctx" if self.signature: signature += ", " + self.signature data_fields = "" for i, expr in enumerate(self.values): data_fields += self._generate_field_assign(i) if self.probe_type == "t": heading = "TRACEPOINT_PROBE(%s, %s)" % \ (self.tp_category, self.tp_event) ctx_name = "args" else: heading = "int %s(%s)" % (self.probe_name, signature) ctx_name = "ctx" time_str = """ __data.timestamp_ns = bpf_ktime_get_ns();""" if self.time_field else "" cpu_str = """ __data.cpu = bpf_get_smp_processor_id();""" if self.print_cpu else "" stack_trace = "" if self.user_stack: stack_trace += """ __data.user_stack_id = %s.get_stackid( %s, BPF_F_USER_STACK );""" % (self.stacks_name, ctx_name) if self.kernel_stack: stack_trace += """ __data.kernel_stack_id = %s.get_stackid( %s, 0 );""" % (self.stacks_name, ctx_name) text = heading + """ { u64 __pid_tgid = bpf_get_current_pid_tgid(); u32 __tgid = __pid_tgid >> 32; u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half %s %s %s %s if (!(%s)) return 0; struct %s __data = {0}; %s %s __data.tgid = __tgid; __data.pid = __pid; bpf_get_current_comm(&__data.comm, sizeof(__data.comm)); %s %s %s.perf_submit(%s, &__data, sizeof(__data)); return 0; } """ text = text % (pid_filter, cgroup_filter, prefix, self._generate_usdt_filter_read(), self.filter, self.struct_name, time_str, cpu_str, data_fields, stack_trace, self.events_name, ctx_name) return self.streq_functions + data_decl + "\n" + text @classmethod def _time_off_str(cls, timestamp_ns): offset = 1e-9 * (timestamp_ns - cls.first_ts) if cls.print_unix_timestamp: return "%.6f" % (offset + cls.first_ts_real) else: return "%.6f" % offset def _display_function(self): if self.probe_type == 'p' or self.probe_type == 'r': return self.function elif self.probe_type == 'u': return self.usdt_name else: # self.probe_type == 't' return self.tp_event def print_stack(self, bpf, stack_id, tgid): if stack_id < 0: print(" %d" % stack_id) return stack = list(bpf.get_table(self.stacks_name).walk(stack_id)) for addr in stack: print(" ", end="") if Probe.print_address: print("%16x " % addr, end="") print("%s" % (bpf.sym(addr, tgid, show_module=True, show_offset=True))) def _format_message(self, bpf, tgid, values): # Replace each %K with kernel sym and %U with user sym in tgid kernel_placeholders = [i for i, t in enumerate(self.types) if t == 'K'] user_placeholders = [i for i, t in enumerate(self.types) if t == 'U'] for kp in kernel_placeholders: values[kp] = bpf.ksym(values[kp], show_offset=True) for up in user_placeholders: values[up] = bpf.sym(values[up], tgid, show_module=True, show_offset=True) return self.python_format % tuple(values) def print_event(self, bpf, cpu, data, size): # Cast as the generated structure type and display # according to the format string in the probe. event = ct.cast(data, ct.POINTER(self.python_struct)).contents if self.name and bytes(self.name) not in event.comm: return values = map(lambda i: getattr(event, "v%d" % i), range(0, len(self.values))) msg = self._format_message(bpf, event.tgid, values) if self.msg_filter and bytes(self.msg_filter) not in msg: return if Probe.print_time: time = strftime("%H:%M:%S") if Probe.use_localtime else \ Probe._time_off_str(event.timestamp_ns) if Probe.print_unix_timestamp: print("%-17s " % time[:17], end="") else: print("%-8s " % time[:8], end="") if Probe.print_cpu: print("%-3s " % event.cpu, end="") print("%-7d %-7d %-15s %-16s %s" % (event.tgid, event.pid, event.comm.decode( 'utf-8', 'replace'), self._display_function(), msg)) if self.kernel_stack: self.print_stack(bpf, event.kernel_stack_id, -1) if self.user_stack: self.print_stack(bpf, event.user_stack_id, event.tgid) if self.user_stack or self.kernel_stack: print("") Probe.event_count += 1 if Probe.max_events is not None and \ Probe.event_count >= Probe.max_events: exit() sys.stdout.flush() def attach(self, bpf, verbose): if len(self.library) == 0: self._attach_k(bpf) else: self._attach_u(bpf) self.python_struct = self._generate_python_data_decl() callback = partial(self.print_event, bpf) bpf[self.events_name].open_perf_buffer(callback, page_cnt=self.page_cnt) def _attach_k(self, bpf): if self.probe_type == "r": bpf.attach_kretprobe(event=self.function, fn_name=self.probe_name) elif self.probe_type == "p": bpf.attach_kprobe(event=self.function, fn_name=self.probe_name) # Note that tracepoints don't need an explicit attach def _attach_u(self, bpf): libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: self._bail("unable to find library %s" % self.library) if self.probe_type == "u": pass # Was already enabled by the BPF constructor elif self.probe_type == "r": bpf.attach_uretprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.tgid) else: bpf.attach_uprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.tgid)
class Probe(object): probe_count = 0 max_events = None event_count = 0 first_ts = 0 use_localtime = True pid = -1 @classmethod def configure(cls, args): cls.max_events = args.max_events cls.use_localtime = not args.offset cls.first_ts = Time.monotonic_time() cls.pid = args.pid or -1 def __init__(self, probe, string_size, kernel_stack, user_stack): self.usdt = None self.raw_probe = probe self.string_size = string_size self.kernel_stack = kernel_stack self.user_stack = user_stack Probe.probe_count += 1 self._parse_probe() self.probe_num = Probe.probe_count self.probe_name = "probe_%s_%d" % \ (self._display_function(), self.probe_num) def __str__(self): return "%s:%s:%s FLT=%s ACT=%s/%s" % (self.probe_type, self.library, self._display_function(), self.filter, self.types, self.values) def is_default_action(self): return self.python_format == "" def _bail(self, error): raise ValueError("error in probe '%s': %s" % (self.raw_probe, error)) def _parse_probe(self): text = self.raw_probe # Everything until the first space is the probe specifier first_space = text.find(' ') spec = text[:first_space] if first_space >= 0 else text self._parse_spec(spec) if first_space >= 0: text = text[first_space:].lstrip() else: text = "" # If we now have a (, wait for the balanced closing ) and that # will be the predicate self.filter = None if len(text) > 0 and text[0] == "(": balance = 1 for i in range(1, len(text)): if text[i] == "(": balance += 1 if text[i] == ")": balance -= 1 if balance == 0: self._parse_filter(text[:i+1]) text = text[i+1:] break if self.filter is None: self._bail("unmatched end of predicate") if self.filter is None: self.filter = "1" # The remainder of the text is the printf action self._parse_action(text.lstrip()) def _parse_spec(self, spec): parts = spec.split(":") # Two special cases: 'func' means 'p::func', 'lib:func' means # 'p:lib:func'. Other combinations need to provide an empty # value between delimiters, e.g. 'r::func' for a kretprobe on # the function func. if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] if len(parts[0]) == 0: self.probe_type = "p" elif parts[0] in ["p", "r", "t", "u"]: self.probe_type = parts[0] else: self._bail("probe type must be '', 'p', 't', 'r', " + "or 'u', but got '%s'" % parts[0]) if self.probe_type == "t": self.tp_category = parts[1] self.tp_event = parts[2] self.library = "" # kernel self.function = "" # generated from TRACEPOINT_PROBE elif self.probe_type == "u": self.library = parts[1] self.usdt_name = parts[2] self.function = "" # no function, just address # We will discover the USDT provider by matching on # the USDT name in the specified library self._find_usdt_probe() else: self.library = parts[1] self.function = parts[2] def _find_usdt_probe(self): self.usdt = USDT(path=self.library, pid=Probe.pid) for probe in self.usdt.enumerate_probes(): if probe.name == self.usdt_name: return # Found it, will enable later self._bail("unrecognized USDT probe %s" % self.usdt_name) def _parse_filter(self, filt): self.filter = self._replace_args(filt) def _parse_types(self, fmt): for match in re.finditer( r'[^%]%(s|u|d|llu|lld|hu|hd|x|llx|c)', fmt): self.types.append(match.group(1)) fmt = re.sub(r'([^%]%)(u|d|llu|lld|hu|hd)', r'\1d', fmt) fmt = re.sub(r'([^%]%)(x|llx)', r'\1x', fmt) self.python_format = fmt.strip('"') def _parse_action(self, action): self.values = [] self.types = [] self.python_format = "" if len(action) == 0: return action = action.strip() match = re.search(r'(\".*\"),?(.*)', action) if match is None: self._bail("expected format string in \"s") self.raw_format = match.group(1) self._parse_types(self.raw_format) for part in match.group(2).split(','): part = self._replace_args(part) if len(part) > 0: self.values.append(part) aliases = { "retval": "PT_REGS_RC(ctx)", "arg1": "PT_REGS_PARM1(ctx)", "arg2": "PT_REGS_PARM2(ctx)", "arg3": "PT_REGS_PARM3(ctx)", "arg4": "PT_REGS_PARM4(ctx)", "arg5": "PT_REGS_PARM5(ctx)", "arg6": "PT_REGS_PARM6(ctx)", "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)", "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)", "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)", "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)", "$cpu": "bpf_get_smp_processor_id()" } def _replace_args(self, expr): for alias, replacement in Probe.aliases.items(): # For USDT probes, we replace argN values with the # actual arguments for that probe obtained using special # bpf_readarg_N macros emitted at BPF construction. if alias.startswith("arg") and self.probe_type == "u": continue expr = expr.replace(alias, replacement) return expr p_type = { "u": ct.c_uint, "d": ct.c_int, "llu": ct.c_ulonglong, "lld": ct.c_longlong, "hu": ct.c_ushort, "hd": ct.c_short, "x": ct.c_uint, "llx": ct.c_ulonglong, "c": ct.c_ubyte } def _generate_python_field_decl(self, idx, fields): field_type = self.types[idx] if field_type == "s": ptype = ct.c_char * self.string_size else: ptype = Probe.p_type[field_type] fields.append(("v%d" % idx, ptype)) def _generate_python_data_decl(self): self.python_struct_name = "%s_%d_Data" % \ (self._display_function(), self.probe_num) fields = [ ("timestamp_ns", ct.c_ulonglong), ("pid", ct.c_uint), ("comm", ct.c_char * 16) # TASK_COMM_LEN ] for i in range(0, len(self.types)): self._generate_python_field_decl(i, fields) if self.kernel_stack: fields.append(("kernel_stack_id", ct.c_int)) if self.user_stack: fields.append(("user_stack_id", ct.c_int)) return type(self.python_struct_name, (ct.Structure,), dict(_fields_=fields)) c_type = { "u": "unsigned int", "d": "int", "llu": "unsigned long long", "lld": "long long", "hu": "unsigned short", "hd": "short", "x": "unsigned int", "llx": "unsigned long long", "c": "char" } fmt_types = c_type.keys() def _generate_field_decl(self, idx): field_type = self.types[idx] if field_type == "s": return "char v%d[%d];\n" % (idx, self.string_size) if field_type in Probe.fmt_types: return "%s v%d;\n" % (Probe.c_type[field_type], idx) self._bail("unrecognized format specifier %s" % field_type) def _generate_data_decl(self): # The BPF program will populate values into the struct # according to the format string, and the Python program will # construct the final display string. self.events_name = "%s_events" % self.probe_name self.struct_name = "%s_data_t" % self.probe_name self.stacks_name = "%s_stacks" % self.probe_name stack_table = "BPF_STACK_TRACE(%s, 1024);" % self.stacks_name \ if (self.kernel_stack or self.user_stack) else "" data_fields = "" for i, field_type in enumerate(self.types): data_fields += " " + \ self._generate_field_decl(i) kernel_stack_str = " int kernel_stack_id;" \ if self.kernel_stack else "" user_stack_str = " int user_stack_id;" \ if self.user_stack else "" text = """ struct %s { u64 timestamp_ns; u32 pid; char comm[TASK_COMM_LEN]; %s %s %s }; BPF_PERF_OUTPUT(%s); %s """ return text % (self.struct_name, data_fields, kernel_stack_str, user_stack_str, self.events_name, stack_table) def _generate_field_assign(self, idx): field_type = self.types[idx] expr = self.values[idx].strip() text = "" if self.probe_type == "u" and expr[0:3] == "arg": text = (" u64 %s = 0;\n" + " bpf_usdt_readarg(%s, ctx, &%s);\n") % \ (expr, expr[3], expr) if field_type == "s": return text + """ if (%s != 0) { bpf_probe_read(&__data.v%d, sizeof(__data.v%d), (void *)%s); } """ % (expr, idx, idx, expr) if field_type in Probe.fmt_types: return text + " __data.v%d = (%s)%s;\n" % \ (idx, Probe.c_type[field_type], expr) self._bail("unrecognized field type %s" % field_type) def _generate_usdt_filter_read(self): text = "" if self.probe_type == "u": for arg, _ in Probe.aliases.items(): if not (arg.startswith("arg") and (arg in self.filter)): continue arg_index = int(arg.replace("arg", "")) arg_ctype = self.usdt.get_probe_arg_ctype( self.usdt_name, arg_index) if not arg_ctype: self._bail("Unable to determine type of {} " "in the filter".format(arg)) text += """ {} {}_filter; bpf_usdt_readarg({}, ctx, &{}_filter); """.format(arg_ctype, arg, arg_index, arg) self.filter = self.filter.replace( arg, "{}_filter".format(arg)) return text def generate_program(self, include_self): data_decl = self._generate_data_decl() # kprobes don't have built-in pid filters, so we have to add # it to the function body: if len(self.library) == 0 and Probe.pid != -1: pid_filter = """ u32 __pid = bpf_get_current_pid_tgid(); if (__pid != %d) { return 0; } """ % Probe.pid elif not include_self: pid_filter = """ u32 __pid = bpf_get_current_pid_tgid(); if (__pid == %d) { return 0; } """ % os.getpid() else: pid_filter = "" prefix = "" signature = "struct pt_regs *ctx" data_fields = "" for i, expr in enumerate(self.values): data_fields += self._generate_field_assign(i) stack_trace = "" if self.user_stack: stack_trace += """ __data.user_stack_id = %s.get_stackid( ctx, BPF_F_REUSE_STACKID | BPF_F_USER_STACK );""" % self.stacks_name if self.kernel_stack: stack_trace += """ __data.kernel_stack_id = %s.get_stackid( ctx, BPF_F_REUSE_STACKID );""" % self.stacks_name if self.probe_type == "t": heading = "TRACEPOINT_PROBE(%s, %s)" % \ (self.tp_category, self.tp_event) ctx_name = "args" else: heading = "int %s(%s)" % (self.probe_name, signature) ctx_name = "ctx" text = heading + """ { %s %s %s if (!(%s)) return 0; struct %s __data = {0}; __data.timestamp_ns = bpf_ktime_get_ns(); __data.pid = bpf_get_current_pid_tgid(); bpf_get_current_comm(&__data.comm, sizeof(__data.comm)); %s %s %s.perf_submit(%s, &__data, sizeof(__data)); return 0; } """ text = text % (pid_filter, prefix, self._generate_usdt_filter_read(), self.filter, self.struct_name, data_fields, stack_trace, self.events_name, ctx_name) return data_decl + "\n" + text @classmethod def _time_off_str(cls, timestamp_ns): return "%.6f" % (1e-9 * (timestamp_ns - cls.first_ts)) def _display_function(self): if self.probe_type == 'p' or self.probe_type == 'r': return self.function elif self.probe_type == 'u': return self.usdt_name else: # self.probe_type == 't' return self.tp_event def print_stack(self, bpf, stack_id, pid): if stack_id < 0: print(" %d" % stack_id) return stack = list(bpf.get_table(self.stacks_name).walk(stack_id)) for addr in stack: print(" %016x %s" % (addr, bpf.sym(addr, pid))) def print_event(self, bpf, cpu, data, size): # Cast as the generated structure type and display # according to the format string in the probe. event = ct.cast(data, ct.POINTER(self.python_struct)).contents values = map(lambda i: getattr(event, "v%d" % i), range(0, len(self.values))) msg = self.python_format % tuple(values) time = strftime("%H:%M:%S") if Probe.use_localtime else \ Probe._time_off_str(event.timestamp_ns) print("%-8s %-6d %-12s %-16s %s" % \ (time[:8], event.pid, event.comm[:12], self._display_function(), msg)) if self.user_stack: print(" User Stack Trace:") self.print_stack(bpf, event.user_stack_id, event.pid) if self.kernel_stack: print(" Kernel Stack Trace:") self.print_stack(bpf, event.kernel_stack_id, -1) if self.user_stack or self.kernel_stack: print("") Probe.event_count += 1 if Probe.max_events is not None and \ Probe.event_count >= Probe.max_events: exit() def attach(self, bpf, verbose): if len(self.library) == 0: self._attach_k(bpf) else: self._attach_u(bpf) self.python_struct = self._generate_python_data_decl() callback = partial(self.print_event, bpf) bpf[self.events_name].open_perf_buffer(callback) def _attach_k(self, bpf): if self.probe_type == "r": bpf.attach_kretprobe(event=self.function, fn_name=self.probe_name) elif self.probe_type == "p": bpf.attach_kprobe(event=self.function, fn_name=self.probe_name) # Note that tracepoints don't need an explicit attach def _attach_u(self, bpf): libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: self._bail("unable to find library %s" % self.library) if self.probe_type == "u": pass # Was already enabled by the BPF constructor elif self.probe_type == "r": bpf.attach_uretprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.pid) else: bpf.attach_uprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.pid)
class Probe(object): probe_count = 0 streq_index = 0 max_events = None event_count = 0 first_ts = 0 use_localtime = True tgid = -1 pid = -1 @classmethod def configure(cls, args): cls.max_events = args.max_events cls.use_localtime = not args.offset cls.first_ts = Time.monotonic_time() cls.tgid = args.tgid or -1 cls.pid = args.pid or -1 def __init__(self, probe, string_size, kernel_stack, user_stack): self.usdt = None self.streq_functions = "" self.raw_probe = probe self.string_size = string_size self.kernel_stack = kernel_stack self.user_stack = user_stack Probe.probe_count += 1 self._parse_probe() self.probe_num = Probe.probe_count self.probe_name = "probe_%s_%d" % (self._display_function(), self.probe_num) def __str__(self): return "%s:%s:%s FLT=%s ACT=%s/%s" % ( self.probe_type, self.library, self._display_function(), self.filter, self.types, self.values, ) def is_default_action(self): return self.python_format == "" def _bail(self, error): raise ValueError("error in probe '%s': %s" % (self.raw_probe, error)) def _parse_probe(self): text = self.raw_probe # Everything until the first space is the probe specifier first_space = text.find(" ") spec = text[:first_space] if first_space >= 0 else text self._parse_spec(spec) if first_space >= 0: text = text[first_space:].lstrip() else: text = "" # If we now have a (, wait for the balanced closing ) and that # will be the predicate self.filter = None if len(text) > 0 and text[0] == "(": balance = 1 for i in range(1, len(text)): if text[i] == "(": balance += 1 if text[i] == ")": balance -= 1 if balance == 0: self._parse_filter(text[: i + 1]) text = text[i + 1 :] break if self.filter is None: self._bail("unmatched end of predicate") if self.filter is None: self.filter = "1" # The remainder of the text is the printf action self._parse_action(text.lstrip()) def _parse_spec(self, spec): parts = spec.split(":") # Two special cases: 'func' means 'p::func', 'lib:func' means # 'p:lib:func'. Other combinations need to provide an empty # value between delimiters, e.g. 'r::func' for a kretprobe on # the function func. if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] if len(parts[0]) == 0: self.probe_type = "p" elif parts[0] in ["p", "r", "t", "u"]: self.probe_type = parts[0] else: self._bail("probe type must be '', 'p', 't', 'r', " + "or 'u', but got '%s'" % parts[0]) if self.probe_type == "t": self.tp_category = parts[1] self.tp_event = parts[2] self.library = "" # kernel self.function = "" # from TRACEPOINT_PROBE elif self.probe_type == "u": self.library = parts[1] self.usdt_name = parts[2] self.function = "" # no function, just address # We will discover the USDT provider by matching on # the USDT name in the specified library self._find_usdt_probe() else: self.library = parts[1] self.function = parts[2] def _find_usdt_probe(self): target = Probe.pid if Probe.pid and Probe.pid != -1 else Probe.tgid self.usdt = USDT(path=self.library, pid=target) for probe in self.usdt.enumerate_probes(): if probe.name == self.usdt_name: return # Found it, will enable later self._bail("unrecognized USDT probe %s" % self.usdt_name) def _parse_filter(self, filt): self.filter = self._rewrite_expr(filt) def _parse_types(self, fmt): for match in re.finditer(r"[^%]%(s|u|d|llu|lld|hu|hd|x|llx|c|K|U)", fmt): self.types.append(match.group(1)) fmt = re.sub(r"([^%]%)(u|d|llu|lld|hu|hd)", r"\1d", fmt) fmt = re.sub(r"([^%]%)(x|llx)", r"\1x", fmt) fmt = re.sub("%K|%U", "%s", fmt) self.python_format = fmt.strip('"') def _parse_action(self, action): self.values = [] self.types = [] self.python_format = "" if len(action) == 0: return action = action.strip() match = re.search(r"(\".*?\"),?(.*)", action) if match is None: self._bail('expected format string in "s') self.raw_format = match.group(1) self._parse_types(self.raw_format) for part in re.split('(?<!"),', match.group(2)): part = self._rewrite_expr(part) if len(part) > 0: self.values.append(part) aliases = { "retval": "PT_REGS_RC(ctx)", "arg1": "PT_REGS_PARM1(ctx)", "arg2": "PT_REGS_PARM2(ctx)", "arg3": "PT_REGS_PARM3(ctx)", "arg4": "PT_REGS_PARM4(ctx)", "arg5": "PT_REGS_PARM5(ctx)", "arg6": "PT_REGS_PARM6(ctx)", "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)", "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)", "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)", "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)", "$cpu": "bpf_get_smp_processor_id()", } def _generate_streq_function(self, string): fname = "streq_%d" % Probe.streq_index Probe.streq_index += 1 self.streq_functions += """ static inline bool %s(char const *ignored, unsigned long str) { char needle[] = %s; char haystack[sizeof(needle)]; bpf_probe_read(&haystack, sizeof(haystack), (void *)str); for (int i = 0; i < sizeof(needle); ++i) { if (needle[i] != haystack[i]) { return false; } } return true; } """ % ( fname, string, ) return fname def _rewrite_expr(self, expr): for alias, replacement in Probe.aliases.items(): # For USDT probes, we replace argN values with the # actual arguments for that probe obtained using # bpf_readarg_N macros emitted at BPF construction. if alias.startswith("arg") and self.probe_type == "u": continue expr = expr.replace(alias, replacement) matches = re.finditer('STRCMP\\(("[^"]+\\")', expr) for match in matches: string = match.group(1) fname = self._generate_streq_function(string) expr = expr.replace("STRCMP", fname, 1) return expr p_type = { "u": ct.c_uint, "d": ct.c_int, "llu": ct.c_ulonglong, "lld": ct.c_longlong, "hu": ct.c_ushort, "hd": ct.c_short, "x": ct.c_uint, "llx": ct.c_ulonglong, "c": ct.c_ubyte, "K": ct.c_ulonglong, "U": ct.c_ulonglong, } def _generate_python_field_decl(self, idx, fields): field_type = self.types[idx] if field_type == "s": ptype = ct.c_char * self.string_size else: ptype = Probe.p_type[field_type] fields.append(("v%d" % idx, ptype)) def _generate_python_data_decl(self): self.python_struct_name = "%s_%d_Data" % (self._display_function(), self.probe_num) fields = [ ("timestamp_ns", ct.c_ulonglong), ("tgid", ct.c_uint), ("pid", ct.c_uint), ("comm", ct.c_char * 16), # TASK_COMM_LEN ] for i in range(0, len(self.types)): self._generate_python_field_decl(i, fields) if self.kernel_stack: fields.append(("kernel_stack_id", ct.c_int)) if self.user_stack: fields.append(("user_stack_id", ct.c_int)) return type(self.python_struct_name, (ct.Structure,), dict(_fields_=fields)) c_type = { "u": "unsigned int", "d": "int", "llu": "unsigned long long", "lld": "long long", "hu": "unsigned short", "hd": "short", "x": "unsigned int", "llx": "unsigned long long", "c": "char", "K": "unsigned long long", "U": "unsigned long long", } fmt_types = c_type.keys() def _generate_field_decl(self, idx): field_type = self.types[idx] if field_type == "s": return "char v%d[%d];\n" % (idx, self.string_size) if field_type in Probe.fmt_types: return "%s v%d;\n" % (Probe.c_type[field_type], idx) self._bail("unrecognized format specifier %s" % field_type) def _generate_data_decl(self): # The BPF program will populate values into the struct # according to the format string, and the Python program will # construct the final display string. self.events_name = "%s_events" % self.probe_name self.struct_name = "%s_data_t" % self.probe_name self.stacks_name = "%s_stacks" % self.probe_name stack_table = "BPF_STACK_TRACE(%s, 1024);" % self.stacks_name if (self.kernel_stack or self.user_stack) else "" data_fields = "" for i, field_type in enumerate(self.types): data_fields += " " + self._generate_field_decl(i) kernel_stack_str = " int kernel_stack_id;" if self.kernel_stack else "" user_stack_str = " int user_stack_id;" if self.user_stack else "" text = """ struct %s { u64 timestamp_ns; u32 tgid; u32 pid; char comm[TASK_COMM_LEN]; %s %s %s }; BPF_PERF_OUTPUT(%s); %s """ return text % (self.struct_name, data_fields, kernel_stack_str, user_stack_str, self.events_name, stack_table) def _generate_field_assign(self, idx): field_type = self.types[idx] expr = self.values[idx].strip() text = "" if self.probe_type == "u" and expr[0:3] == "arg": text = (" u64 %s = 0;\n" + " bpf_usdt_readarg(%s, ctx, &%s);\n") % (expr, expr[3], expr) if field_type == "s": return ( text + """ if (%s != 0) { bpf_probe_read(&__data.v%d, sizeof(__data.v%d), (void *)%s); } """ % (expr, idx, idx, expr) ) if field_type in Probe.fmt_types: return text + " __data.v%d = (%s)%s;\n" % (idx, Probe.c_type[field_type], expr) self._bail("unrecognized field type %s" % field_type) def _generate_usdt_filter_read(self): text = "" if self.probe_type == "u": for arg, _ in Probe.aliases.items(): if not (arg.startswith("arg") and (arg in self.filter)): continue arg_index = int(arg.replace("arg", "")) arg_ctype = self.usdt.get_probe_arg_ctype(self.usdt_name, arg_index) if not arg_ctype: self._bail("Unable to determine type of {} " "in the filter".format(arg)) text += """ {} {}_filter; bpf_usdt_readarg({}, ctx, &{}_filter); """.format( arg_ctype, arg, arg_index, arg ) self.filter = self.filter.replace(arg, "{}_filter".format(arg)) return text def generate_program(self, include_self): data_decl = self._generate_data_decl() # kprobes don't have built-in pid filters, so we have to add # it to the function body: if len(self.library) == 0 and Probe.pid != -1: pid_filter = ( """ if (__pid != %d) { return 0; } """ % Probe.pid ) elif len(self.library) == 0 and Probe.tgid != -1: pid_filter = ( """ if (__tgid != %d) { return 0; } """ % Probe.tgid ) elif not include_self: pid_filter = ( """ if (__tgid == %d) { return 0; } """ % os.getpid() ) else: pid_filter = "" prefix = "" signature = "struct pt_regs *ctx" data_fields = "" for i, expr in enumerate(self.values): data_fields += self._generate_field_assign(i) if self.probe_type == "t": heading = "TRACEPOINT_PROBE(%s, %s)" % (self.tp_category, self.tp_event) ctx_name = "args" else: heading = "int %s(%s)" % (self.probe_name, signature) ctx_name = "ctx" stack_trace = "" if self.user_stack: stack_trace += """ __data.user_stack_id = %s.get_stackid( %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK );""" % ( self.stacks_name, ctx_name, ) if self.kernel_stack: stack_trace += """ __data.kernel_stack_id = %s.get_stackid( %s, BPF_F_REUSE_STACKID );""" % ( self.stacks_name, ctx_name, ) text = ( heading + """ { u64 __pid_tgid = bpf_get_current_pid_tgid(); u32 __tgid = __pid_tgid >> 32; u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half %s %s %s if (!(%s)) return 0; struct %s __data = {0}; __data.timestamp_ns = bpf_ktime_get_ns(); __data.tgid = __tgid; __data.pid = __pid; bpf_get_current_comm(&__data.comm, sizeof(__data.comm)); %s %s %s.perf_submit(%s, &__data, sizeof(__data)); return 0; } """ ) text = text % ( pid_filter, prefix, self._generate_usdt_filter_read(), self.filter, self.struct_name, data_fields, stack_trace, self.events_name, ctx_name, ) return self.streq_functions + data_decl + "\n" + text @classmethod def _time_off_str(cls, timestamp_ns): return "%.6f" % (1e-9 * (timestamp_ns - cls.first_ts)) def _display_function(self): if self.probe_type == "p" or self.probe_type == "r": return self.function elif self.probe_type == "u": return self.usdt_name else: # self.probe_type == 't' return self.tp_event def print_stack(self, bpf, stack_id, tgid): if stack_id < 0: print(" %d" % stack_id) return stack = list(bpf.get_table(self.stacks_name).walk(stack_id)) for addr in stack: print(" %016x %s" % (addr, bpf.sym(addr, tgid))) def _format_message(self, bpf, tgid, values): # Replace each %K with kernel sym and %U with user sym in tgid kernel_placeholders = [i for i in xrange(0, len(self.types)) if self.types[i] == "K"] user_placeholders = [i for i in xrange(0, len(self.types)) if self.types[i] == "U"] for kp in kernel_placeholders: values[kp] = bpf.ksymaddr(values[kp]) for up in user_placeholders: values[up] = bpf.symaddr(values[up], tgid) return self.python_format % tuple(values) def print_event(self, bpf, cpu, data, size): # Cast as the generated structure type and display # according to the format string in the probe. event = ct.cast(data, ct.POINTER(self.python_struct)).contents values = map(lambda i: getattr(event, "v%d" % i), range(0, len(self.values))) msg = self._format_message(bpf, event.tgid, values) time = strftime("%H:%M:%S") if Probe.use_localtime else Probe._time_off_str(event.timestamp_ns) print( "%-8s %-6d %-6d %-12s %-16s %s" % (time[:8], event.tgid, event.pid, event.comm, self._display_function(), msg) ) if self.kernel_stack: self.print_stack(bpf, event.kernel_stack_id, -1) if self.user_stack: self.print_stack(bpf, event.user_stack_id, event.tgid) if self.user_stack or self.kernel_stack: print("") Probe.event_count += 1 if Probe.max_events is not None and Probe.event_count >= Probe.max_events: exit() def attach(self, bpf, verbose): if len(self.library) == 0: self._attach_k(bpf) else: self._attach_u(bpf) self.python_struct = self._generate_python_data_decl() callback = partial(self.print_event, bpf) bpf[self.events_name].open_perf_buffer(callback) def _attach_k(self, bpf): if self.probe_type == "r": bpf.attach_kretprobe(event=self.function, fn_name=self.probe_name) elif self.probe_type == "p": bpf.attach_kprobe(event=self.function, fn_name=self.probe_name) # Note that tracepoints don't need an explicit attach def _attach_u(self, bpf): libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: self._bail("unable to find library %s" % self.library) if self.probe_type == "u": pass # Was already enabled by the BPF constructor elif self.probe_type == "r": bpf.attach_uretprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.pid) else: bpf.attach_uprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.pid)