def start_parse_mds_log(filepath, mode): with open(filepath) as fp: for line in fp: words = utils.log_split(line) wdlen = len(words) if utils.check_err_log(words, line): continue if (check_cache_stats(words, wdlen)): continue print_mcs(mode)
def start_parse_mon_log(filepath, mode): with open(filepath) as fp: #import pdb #pdb.set_trace() global start_ts start_ts = get_start_timestamp(fp) for line in fp: words = utils.log_split(line) wdlen = len(words) if utils.check_err_log(words, line): continue if (check_osd_fail_report(words, wdlen)): continue if (check_mon_flaps(words, wdlen)): continue if check_slow_requests(line, words, wdlen): continue print_monstat(mode) utils.dump_errors()
2. monitor flapping 3. slow requests ''' import common.utils as utils ### osd failure report | default log ### osd_fail_log = "2020-04-09 22:31:07.432799 mon.juju-0aa49a-8-lxd-0 mon.0 10.10.0.84:6789/0 5487342 : cluster [DBG] osd.45 10.10.0.37:6800/511440 reported failed by osd.86 10.10.0.47:6803/5068" #(0, '2020-04-09') (1, '22:31:07.432799') (2, 'mon.juju-0aa49a-8-lxd-0') (3, 'mon.0') (4, '10.10.0.84:6789/0') (5, '5487342') (6, ':') (7, 'cluster') (8, '[DBG]') (9, 'osd.45') (10, '10.10.0.37:6800/511440') (11, 'reported') (12, 'failed') (13, 'by') (14, 'osd.86') (15, '10.10.0.47:6803/5068') osd_fail_index_list = [11, 12, 13] osd_fail_key2idx = {"failed": 9, "reporter": 14} osd_fail_words = utils.log_split(osd_fail_log) ### monitor re-election ### mon_flap_log = "2020-05-30 08:00:56.890317 mon.juju-e71ac3-7-lxd-0 mon.2 172.26.234.31:6789/0 250660 : cluster [INF] mon.juju-e71ac3-7-lxd-0 calling monitor election" #(0, '2020-05-30') (1, '08:00:56.890317') (2, 'mon.juju-e71ac3-7-lxd-0') (3, 'mon.2') (4, '172.26.234.31:6789/0') (5, '250660') (6, ':') (7, 'cluster') (8, '[INF]') (9, 'mon.juju-e71ac3-7-lxd-0') (10, 'calling') (11, 'monitor') (12, 'election') mon_flap_index_list = [10, 11, 12] mon_flap_key2idx = {"mon": 9} mon_flap_words = utils.log_split(mon_flap_log) ### Slow requests slow_requests = "2020-05-28 21:24:08.804634 mon.juju-e71ac3-18-lxd-4 mon.0 172.26.234.21:6789/0 5336 : cluster [WRN] Health check failed: 7 slow requests are blocked > 32 sec. Implicated osds 6 (REQUEST_SLOW)" slow_requests_match_list = ["slow requests"]
import common.utils as utils bs_write_begin = "2020-06-01 08:48:53.059625 7fddaf6e4700 15 bluestore(/home/ubuntu/backport/luminous/ceph/build/dev/osd0) _write 7.4_head #7:24f7b562:::rbd_data.10606b8b4567.000000000000002f:head# 0x340c00~c00" bs_write_end = "2020-06-01 08:48:53.059972 7fddaf6e4700 10 bluestore(/home/ubuntu/backport/luminous/ceph/build/dev/osd0) _write 7.4_head #7:24f7b562:::rbd_data.10606b8b4567.000000000000002f:head# 0x340c00~c00 = 0" ''' (0, '2020-06-01') (1, '08:48:53.059625') (2, '7fddaf6e4700') (3, '15') (4, 'bluestore(/home/ubuntu/backport/luminous/ceph/build/dev/osd0)') (5, '_write') (6, '7.4_head') (7, '#7:24f7b562:::rbd_data.10606b8b4567.000000000000002f:head#') (8, '0x340c00~c00') (0, '2020-06-01') (1, '08:48:53.059972') (2, '7fddaf6e4700') (3, '10') (4, 'bluestore(/home/ubuntu/backport/luminous/ceph/build/dev/osd0)') (5, '_write') (6, '7.4_head') (7, '#7:24f7b562:::rbd_data.10606b8b4567.000000000000002f:head#') (8, '0x340c00~c00') (9, '=') (10, '0') ''' bs_write_begin_words = utils.log_split(bs_write_begin) bs_write_end_words = utils.log_split(bs_write_end) bs_write_bi_list = [5] bs_write_ei_list = [5] bs_write_begin_key2idx = {"thread": 2, "offlen": 8, "cid": 6, "oid": 7} bs_write_end_key2idx = {"thread": 2, "offlen": 8, "cid": 6, "oid": 7} bs_read_begin = "2020-06-01 08:48:05.909551 7fddb0ee7700 15 bluestore(/home/ubuntu/backport/luminous/ceph/build/dev/osd0) read 7.6_head #7:64243b03:::rbd_object_map.10606b8b4567:head# 0x12~80" bs_read_end = "2020-06-01 08:48:05.909610 7fddb0ee7700 10 bluestore(/home/ubuntu/backport/luminous/ceph/build/dev/osd0) read 7.6_head #7:64243b03:::rbd_object_map.10606b8b4567:head# 0x12~80 = 128" ''' (0, '2020-06-01') (1, '08:48:05.909551') (2, '7fddb0ee7700') (3, '15') (4, 'bluestore(/home/ubuntu/backport/luminous/ceph/build/dev/osd0)') (5, 'read') (6, '7.6_head') (7, '#7:64243b03:::rbd_object_map.10606b8b4567:head#') (8, '0x12~80')
def get_start_timestamp(fp): first_line = fp.readline() words = utils.log_split(first_line) return utils.timestamp(words[0] + "-" + words[1])
import common.utils as utils ### mds cache stats || debug_mds 2### cache_stats_log = "2020-08-31 07:09:19.141 7fe4a4b8f700 2 mds.0.cache Memory usage: total 15075176, rss 14665904, heap 330416, baseline 330416, 3181526 / 3662455 inodes have caps, 3183808 caps, 0.86931 caps per inode" #(0, '2020-08-31') (1, '07:09:19.141') (2, '7fe4a4b8f700') (3, '2') (4, 'mds.0.cache') (5, 'Memory') (6, 'usage:') (7, 'total') (8, '15075176,') (9, 'rss') (10, '14665904,') (11, 'heap') (12, '330416,') (13, 'baseline') (14, '330416,') (15, '3181526') (16, '/') (17, '3662455') (18, 'inodes') (19, 'have') (20, 'caps,') (21, '3183808') (22, 'caps,') (23, '0.86931') (24, 'caps') (25, 'per') (26, 'inode') cache_stats_index_list = [5, 6, 18, 19, 20] cache_stats_key2idx = { "timestamp": 1, "rss": 10, "inodes": 17, "caps": 21, "caps per inode": 23 } cache_stats_words = utils.log_split(cache_stats_log) ### client not releasing caps || debug_mds 2 ### not_releasing_caps_log = "2020-08-31 06:36:19.192 7fe4a6b93700 2 mds.beacon.juju-0f7010-18-lxd-0 Session stha8p0pq (3435558) is not releasing caps fast enough. Recalled caps at 33945 > 32768 (mds_recall_warning_threshold)."
def start_parse_osd_log(filepath, mode): bsw_begin = {} bsw_stats = [] bsr_begin = {} bsr_stats = [] wops = {} rops = {} with open(filepath) as fp: for line in fp: words = utils.log_split(line) if is_bs_write_begin(words): process_bs_write_begin(words, bsw_begin) continue if is_bs_write_end(words): process_bs_write_end(words, bsw_begin, bsw_stats) continue if is_bs_read_begin(words): process_bs_read_begin(words, bsr_begin) continue if is_bs_read_end(words): process_bs_read_end(words, bsr_begin, bsr_stats) continue if is_bs_penalty_read_begin(words): continue if is_bs_penalty_read_end(words): continue if is_write_op_start(line, words): process_write_op_start(line, words, wops) continue if is_write_op_end(line): process_write_op_end(words, wops) continue if is_subop_sent(line, words): process_subop_sent(words, wops) continue if is_subop_reply(line, words): process_subop_reply(words, wops) continue if is_read_op_start(line, words): process_read_op_start(line, words, rops) continue ''' if check_and_process_read_op_reply(line, words, rops): continue ''' if is_repop_start(words): process_repop_start(words) continue if is_repop_end(words): process_repop_end(words) continue ''' if is_enqueue_op(words): op = process_enqueue_op(line, words, ops) continue ''' if is_dequeue_op_start(words): process_dequeue_op_start(words, wops, rops) continue if is_dequeue_op_end(words): process_dequeue_op_end(words, wops, rops) continue if is_eval_repop(line, words): process_eval_repop(words, wops) continue if is_op_commit(line, words): process_op_commit(words, wops) continue #if is_bs_read_begin() # remove those non completed ops remove_no_reply_op(wops) print_write_op_latency(wops) remove_no_reply_op(rops) print_read_op_latency(rops)