class BucketGrep: """greps for custom regex and bucketizes results""" strayre = r".*" basere = r" *(?P<level>[A-Z]*) *\[(?P<thread_name>[^\]]*?)[:_-]?(?P<thread_id>[0-9]*)\] (?P<date>.{10} .{12}) *.*" def __init__( self, regex, diag_dir=None, files=None, start=None, end=None, ignorecase=True, report="summary", ): self.diag_dir = diag_dir self.files = files self.start = None self.end = None self.start_time = None self.end_time = None self.last_time = None self.report = report if start: self.start_time = date_parse(start) if end: self.end_time = date_parse(end) if ignorecase: self.strayregex = re.compile(self.strayre + regex + ".*", re.IGNORECASE) self.timeregex = re.compile(self.basere + regex + ".*", re.IGNORECASE) self.supplied_regex = regex.lower() else: self.strayregex = re.compile(self.strayre + regex + ".*") self.timeregex = re.compile(self.basere + regex + ".*") self.supplied_regex = regex self.valid_log_regex = re.compile(self.basere) self.node_matches = OrderedDefaultDict() self.matches = OrderedDefaultDict(list) self.count = 0 self.unknown = 0 self.analyzed = False def analyze(self): """parses logs for results""" print("bucketgrep version %s" % VERSION) print("search: '%s'" % self.supplied_regex) target = None if self.files: target = self.files elif self.diag_dir: if self.diag_dir == ".": directory_path = os.getcwd() print("from directory '%s':" % directory_path) else: print("from directory '%s':" % self.diag_dir) target = diag.find_logs(self.diag_dir) else: raise Exception("no diag dir and no files specified") for file in target: with diag.FileWithProgress(file) as log: node_name = extract_node_name(file, ignore_missing_nodes=True) self.node_matches[node_name] = OrderedDefaultDict(list) for line in log: # as long as it's a valid log line we want the date, # even if we don't care about the rest of the line so we can set # the last date for any straregex lines that match current_dt = self.valid_log_regex.match(line) if current_dt: dt = date()(current_dt.group("date")) # if the log line is valite we want to set the last_time self.last_time = dt # we now can validate if our search term matches the log line d = self.timeregex.match(line) if d: # normal case, well-formatted log line self.__setdates(dt) if self.start_time and dt < self.start_time: continue if self.end_time and dt > self.end_time: continue self.matches[dt].append(line) self.node_matches[node_name][dt].append(line) self.count += 1 else: m = self.strayregex.match(line) # check for a match in an unformatted line, like a traceback if m: if self.last_time is None: # match, but no previous timestamp to associate with self.unknown += 1 continue self.matches[self.last_time].append(line) self.node_matches[node_name][ self.last_time].append(line) self.count += 1 self.analyzed = True def __setdates(self, dt): if not self.start: self.start = dt self.end = dt return if dt > self.end: self.end = dt if dt < self.start: self.start = dt def print_report(self, interval=3600): """print bucketized result counts""" print() if not self.analyzed: self.analyze() if not self.matches: print("No matches found") if self.unknown: print(self.unknown, "matches without timestamp") return if self.report == "summary": print() print("cluster wide") print("------------") buckets = sorted( bucketize(self.matches, start=self.start, end=self.end, seconds=interval).items(), key=lambda t: t[0], ) maxval = len(max(buckets, key=lambda t: len(t[1]))[1]) for time, matches in buckets: pad = "" for x in range(len(str(maxval)) - len(str(len(matches)))): pad += " " print( time.strftime("%Y-%m-%d %H:%M:%S") + pad, len(matches), textbar(maxval, len(matches)), ) else: print() print() print("per node numbers") print("----------------") for node in sorted(self.node_matches.keys()): print() print("node: %s" % node) print("--------") if not len(self.node_matches[node]): print("No matches for %s found" % node) continue buckets = sorted( bucketize( self.node_matches[node], start=self.start, end=self.end, seconds=interval, ).items(), key=lambda t: t[0], ) maxval = len(max(buckets, key=lambda t: len(t[1]))[1]) for time, matches in buckets: pad = "" for x in range(len(str(maxval)) - len(str(len(matches)))): pad += " " print( time.strftime("%Y-%m-%d %H:%M:%S") + pad, len(matches), textbar(maxval, len(matches)), ) if self.unknown: print(self.unknown, "matches without timestamp")
class SysbottleReport: "Produces a report from iostat output" def __init__(self, infile, conf=None): self.infile = infile self.parser = IOStatParser() self.count = 0 self.cpu_exceeded = 0 self.iowait_exceeded = 0 self.devices = OrderedDefaultDict(lambda: OrderedDefaultDict(list)) self.cpu_stats = OrderedDefaultDict(list) self.queuedepth = OrderedDefaultDict(int) self.start = None self.end = None self.device_index = OrderedDict() self.cpu_index = OrderedDict() self.conf = conf or self.__mk_conf() self.recs = set() self.analyzed = False def __mk_conf(self): conf = OrderedDict() conf["iowait_threshold"] = 5 conf["cpu_threshold"] = 50 conf["disks"] = [] conf["queue_threshold"] = 1 conf["busy_threshold"] = 5 return conf def analyze(self): "analyzes the file this class was initialized with" for io in self.parser.parse(self.infile): self.count += 1 if not self.device_index: self.__mk_col_idx(io) self.__analyze_disk(io) self.__analyze_cpu(io) if not self.start: self.start = io["date"] if not self.end or io["date"] > self.end: self.end = io["date"] self.analyzed = True def __mk_col_idx(self, stat): for i, col in enumerate(stat["device"]["cols"]): self.device_index[col] = i for i, col in enumerate(stat["cpu"]["cols"]): self.cpu_index[col] = i def __want_disk(self, name): if not self.conf["disks"]: return True return name in self.conf["disks"] def __analyze_disk(self, stat): for disk, values in stat["device"]["stat"].items(): if self.__want_disk(disk): for col in self.device_index: val = values[self.device_index[col]] self.devices[disk][col].append(val) if "qu" in col and val >= self.conf["queue_threshold"]: self.queuedepth[disk] += 1 self.recs.add("* decrease activity on %s" % disk) def __analyze_cpu(self, stat): total = 0 for cpu in ["system", "user", "nice", "steal"]: total += stat["cpu"]["stat"][self.cpu_index["%" + cpu]] self.cpu_stats["total"].append(total) if total > self.conf["cpu_threshold"]: self.cpu_exceeded += 1 self.recs.add("* tune for less CPU usage") for col in self.cpu_index: val = stat["cpu"]["stat"][self.cpu_index[col]] self.cpu_stats[col].append(val) if (stat["cpu"]["stat"][self.cpu_index["%iowait"]] > self.conf["iowait_threshold"]): self.iowait_exceeded += 1 self.recs.add("* tune for less IO") def print_report(self): "prints a report for the file this class was initialized with, analyzing if necessary" if not self.analyzed: self.analyze() print("sysbottle version %s" % VERSION) print() print() print("* total records: %s" % self.count) if self.count: def report_percentage(a): return (float(a) / float(self.count)) * 100.0 print( "* total bottleneck time: %.2f%% (cpu bound, io bound, or both)" % report_percentage(self.iowait_exceeded + self.cpu_exceeded)) print("* cpu+system+nice+steal time > %.2f%%: %.2f%%" % (self.conf["cpu_threshold"], report_percentage(self.cpu_exceeded))) print("* iowait time > %.2f%%: %.2f%%" % ( self.conf["iowait_threshold"], report_percentage(self.iowait_exceeded), )) print("* start %s" % self.start) print("* end %s" % self.end) log_time_seconds = (self.end - self.start).total_seconds() + 1 print("* log time: %ss" % log_time_seconds) print("* interval: %ss" % report_percentage(log_time_seconds)) for device in self.devices.keys(): print("* %s time at queue depth >= %.2f: %.2f%%" % ( device, self.conf["queue_threshold"], report_percentage(self.queuedepth[device]), )) print() lines = [] lines.append(get_percentile_headers()) lines.append(["", "---", "---", "---", "---", "---", "---"]) lines.append(get_percentiles("cpu", self.cpu_stats["total"])) lines.append(get_percentiles("iowait", self.cpu_stats["%iowait"])) lines.append([]) lines.append(get_percentile_headers()) lines.append(["", "---", "---", "---", "---", "---", "---"]) for device in self.devices: lines.append([device, "", "", "", "", "", ""]) for iotype in self.devices[device].keys(): if "qu" in iotype or "wait" in iotype: lines.append( get_percentiles("- " + iotype + ":", self.devices[device][iotype])) lines.append([]) humanize.pad_table(lines, 8, 2) for line in lines: print("".join(line)) self.print_recommendations() def print_recommendations(self): """print recommendations""" if not self.recs: return print("recommendations") print("-" * 15) for rec in self.recs: print(rec)