def make_crf_model(ns): conf_load = config.open_config(ns.config_load) conf_dump = config.open_config(ns.config_dump) lv = logging.DEBUG if ns.debug else logging.INFO config.set_common_logging(conf_dump, logger=_logger, lv=lv) from . import train output = ns.output if ns.train_file is None: from amulog import log_db d = parse_condition(ns.conditions) output_sampled = ns.output_sampled get_output_sampled = output_sampled is not None ld = log_db.LogData(conf_load) iterobj = [lm for lm in ld.iter_lines(**d)] if get_output_sampled: fn, l_train = train.make_crf_model(conf_dump, iterobj, output, return_sampled_messages=True) import pickle with open(output_sampled, 'wb') as f: pickle.dump(l_train, f) print("> {0}".format(output_sampled)) else: fn = train.make_crf_model(conf_dump, iterobj, output) else: fn = train.make_crf_model_from_trainfile(conf_dump, ns.train_file, output) print("> {0}".format(fn))
def make_crf_train(ns): conf_load = config.open_config(ns.config_load) conf_dump = config.open_config(ns.config_dump) lv = logging.DEBUG if ns.debug else logging.INFO config.set_common_logging(conf_dump, logger=_logger, lv=lv) from . import train from amulog import log_db d = parse_condition(ns.conditions) ld = log_db.LogData(conf_load) iterobj = ld.iter_lines(**d) print(train.crf_trainfile(conf_dump, iterobj))
def open_logdag_config(conf_path=None, debug=False): if conf_path is None: conf = config.open_config(DEFAULT_CONFIG, env="LOGDAG_CONFIG", base_default=False) else: conf = config.open_config(conf_path, env="LOGDAG_CONFIG", base_default=False, ex_defaults=[DEFAULT_CONFIG]) lv = logging.DEBUG if debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf, logger=[_logger, am_logger], lv=lv) return conf
def test_load_asis(self): conf = config.open_config(arguments.DEFAULT_CONFIG, base_default=False) conf["general"]["evdb"] = "sql" conf["database_sql"]["database"] = "sqlite3" conf["database_amulog"]["source_conf"] = self._path_amulogconf conf["database_sql"]["sqlite3_filename"] = self._path_testdb conf["filter"]["rules"] = "" from logdag import dtutil from logdag.source import evgen_log w_term = self._whole_term size = config.str2dur("1d") el = evgen_log.LogEventLoader(conf) for dt_range in dtutil.iter_term(w_term, size): el.read(dt_range, dump_org=False) am = arguments.ArgumentManager(conf) am.generate(arguments.all_args) from logdag import makedag edge_cnt = 0 for args in am: ldag = makedag.makedag_main(args, do_dump=False) edge_cnt += ldag.number_of_edges() assert edge_cnt > 0
def setUpClass(cls): fd_testlog, cls._path_testlog = tempfile.mkstemp() os.close(fd_testlog) fd_amulogdb, cls._path_amulogdb = tempfile.mkstemp() os.close(fd_amulogdb) fd_ltgendump, cls._path_ltgendump = tempfile.mkstemp() os.close(fd_ltgendump) fd_testdb, cls._path_testdb = tempfile.mkstemp() os.close(fd_testdb) cls._amulog_conf = config.open_config() cls._amulog_conf['general']['src_path'] = cls._path_testlog cls._amulog_conf['database']['sqlite3_filename'] = cls._path_amulogdb cls._amulog_conf['manager']['indata_filename'] = cls._path_ltgendump fd_amulogconf, cls._path_amulogconf = tempfile.mkstemp() f = os.fdopen(fd_amulogconf, "w") cls._amulog_conf.write(f) f.close() tlg = testutil.TestLogGenerator(testutil.DEFAULT_CONFIG, seed=3) tlg.dump_log(cls._path_testlog) cls._whole_term = tlg.term from amulog import __main__ as amulog_main from amulog import manager targets = amulog_main.get_targets_conf(cls._amulog_conf) manager.process_files_online(cls._amulog_conf, targets, reset_db=True)
def test_shiso_first(self): conf = config.open_config() conf['log_template']['lt_methods'] = "shiso" table = self._try_method(conf) n_tpl = len(table) self.assertTrue(3 < n_tpl < 20)
def list_trouble_stat(ns): conf = open_logdag_config(ns) amulog_conf = config.open_config(conf["database_amulog"]["source_conf"]) from . import trouble tm = trouble.init_trouble_manager(conf) from amulog import log_db ld = log_db.LogData(amulog_conf) gid_name = conf.get("database_amulog", "event_gid") from scipy.stats import entropy table = [[ "trouble_id", "group", "messages", "gids", "hosts", "events", "groups", "entropy_events", "entropy_groups" ]] for tr in tm: line = [] d_ev, d_gid, d_host = trouble.event_stat(tr, ld, gid_name) d_group = trouble.event_label(d_gid, ld, gid_name) ent_ev = entropy(list(d_ev.values()), base=2) ent_group = entropy( [sum([d_gid[gid] for gid in l_gid]) for l_gid in d_group.values()], base=2) line.append(tr.tid) line.append(tr.data["group"]) line.append(sum(d_gid.values())) # messages line.append(len(d_gid.keys())) # gids line.append(len(d_host.keys())) # hosts line.append(len(d_ev.keys())) # events line.append(len(d_group.keys())) # groups line.append(ent_ev) # entropy of events line.append(ent_group) # entropy of groups table.append(line) print(common.cli_table(table))
def test_dlog(self): conf = config.open_config() conf['log_template']['lt_methods'] = "dlog" table = self._try_method(conf, online=False) n_tpl = len(table) self.assertTrue(3 < n_tpl < 300)
def show_diff_direction(ns): l_conffp = ns.confs assert len(l_conffp) == 2 openconf = lambda c: config.open_config( c, ex_defaults=[arguments.DEFAULT_CONFIG]) conf1, conf2 = [openconf(c) for c in l_conffp] lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) def _print_diff(ret): for ev1, ev2, di1, di2 in ret: print("{0} {1} | {2} {3}".format(ev1, di1, di2, ev2)) cnt = 0 from . import comp_conf am = arguments.ArgumentManager(conf1) am.load() if ns.argname is None: for dt_range in sorted(am.iter_dt_range()): ret = comp_conf.edge_direction_diff(conf1, conf2, dt_range) cnt += len(ret) if len(ret) > 0: print(dt_range) _print_diff(ret) print("") else: args = am.jobname2args(ns.argname, conf) dt_range = args[2] ret = comp_conf.edge_direction_diff(conf1, conf2, dt_range) cnt += len(ret) _print_diff(ret) print(cnt)
def search_trouble(ns): conf = open_logdag_config(ns) amulog_conf = config.open_config(conf["database_amulog"]["source_conf"]) d = parse_condition(ns.conditions) from . import trouble tm = trouble.init_trouble_manager(conf) from amulog import log_db ld = log_db.LogData(amulog_conf) gid_name = conf.get("database_amulog", "event_gid") # match group if "group" in d: l_tr = [tr for tr in tm if tr.data["group"] == d["group"]] else: l_tr = [tr for tr in tm] # match event if "gid" in d or "host" in d: search_gid = d.get("gid", None) search_host = d.get("host", None) ret = [] for tr in l_tr: for lid in tr.data["message"]: lm = ld.get_line(lid) gid = lm.lt.get(gid_name) host = lm.host if (search_gid is None or search_gid == gid) and \ (search_host is None or search_host == host): ret.append(tr) break l_tr = ret for tr in l_tr: print(tr)
def test_fttree(self): conf = config.open_config() conf['log_template']['lt_methods'] = "fttree" table = self._try_method(conf) n_tpl = len(table) self.assertTrue(3 < n_tpl < 50)
def test_db_sqlite3(self): path_testlog = "/tmp/amulog_testlog" path_db = "/tmp/amulog_db" conf = config.open_config() path_testlog = conf['general']['src_path'] path_db = conf['database']['sqlite3_filename'] tlg = testlog.TestLogGenerator(testlog.DEFAULT_CONFIG, seed=3) tlg.dump_log(path_testlog) l_path = config.getlist(conf, "general", "src_path") if conf.getboolean("general", "src_recur"): targets = common.recur_dir(l_path) else: targets = common.rep_dir(l_path) log_db.process_files(conf, targets, True) ld = log_db.LogData(conf) num = ld.count_lines() self.assertEqual(num, 6539, "not all logs added to database") ltg_num = len([gid for gid in ld.iter_ltgid()]) self.assertTrue(ltg_num > 3 and ltg_num < 10, ("log template generation fails? " "(groups: {0})".format(ltg_num))) del ld common.rm(path_testlog) common.rm(path_db)
def show_diff_edges(ns): l_conffp = ns.confs assert len(l_conffp) == 2 openconf = lambda c: config.open_config( c, ex_defaults=[arguments.DEFAULT_CONFIG]) conf1, conf2 = [openconf(c) for c in l_conffp] lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) from . import comp_conf am = arguments.ArgumentManager(conf1) am.load() for dt_range in sorted(am.iter_dt_range()): cevmap, cgraph = comp_conf.edge_set_diff(conf1, conf2, dt_range) buf_edges = [] for edge in cgraph.edges(): buf = "" src_info = cevmap.evdef(edge[0]) buf += "[gid={0[0]}, host = {0[1]}]".format(src_info) if showdag.isdirected(edge, cgraph): buf += " -> " else: buf += " <-> " dst_info = cevmap.evdef(edge[1]) buf += "[gid={0[0]}, host = {0[1]}]".format(dst_info) buf_edges.append(buf) if len(buf_edges) > 0: print("date: {0}".format(dt_range[0])) print("\n".join(buf_edges))
def test_import(self): conf = config.open_config() conf['log_template']['lt_methods'] = "import" tpl_path = common.filepath_local(__file__, "testlog_tpl.txt") conf['log_template_import']['def_path'] = tpl_path table = self._try_method(conf) n_tpl = len(table) self.assertTrue(n_tpl == 6)
def show_match_diff(ns): l_conffp = ns.confs assert len(l_conffp) == 2 kwargs = {"ex_defaults": [arguments.DEFAULT_CONFIG]} conf1 = config.open_config(l_conffp[0], **kwargs) conf2 = config.open_config(l_conffp[1], **kwargs) lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) from . import trouble tm = trouble.init_trouble_manager(conf1) def _dag_from_name(conf, name): args = arguments.name2args(name, conf) r = showdag.LogDAG(args) r.load() return r from logdag import showdag from . import match_edge for tr in tm: d_args1 = match_edge.match_edges(conf1, tr, rule=ns.rule, cond=ns.cond) cnt1 = sum([len(l_edge) for l_edge in d_args1.values()]) d_args2 = match_edge.match_edges(conf2, tr, rule=ns.rule, cond=ns.cond) cnt2 = sum([len(l_edge) for l_edge in d_args2.values()]) if cnt1 == cnt2: pass else: print("Trouble {0} {1} ({2})".format(tr.tid, tr.data["date"], tr.data["group"])) print("{0}: {1}".format(config.getname(conf1), cnt1)) for key, l_edge in d_args1.items(): r1 = _dag_from_name(conf1, key) for edge in l_edge: edgestr = r1.edge_str(edge, graph=r1.graph.to_undirected()) print(key, edgestr) print("{0}: {1}".format(config.getname(conf2), cnt2)) for key, l_edge in d_args2.items(): r2 = _dag_from_name(conf2, key) for edge in l_edge: edgestr = r2.edge_str(edge, graph=r2.graph.to_undirected()) print(key, edgestr) print("")
def _conf_mysql(cls): conf = config.open_config() conf['general']['src_path'] = cls._path_testlog conf['database']['database'] = "mysql" conf['database']['mysql_host'] = "localhost" conf['database']['mysql_dbname'] = "test_amulog" conf['database']['mysql_user'] = "******" conf['database']['mysql_passwd'] = "testamulog" conf['manager']['indata_filename'] = cls._path_ltgendump return conf
def __init__(self, dt_range, conf_path, gid_name, use_mapping): self.conf = config.open_config(conf_path) self._ld = log_db.LogData(self.conf) self._gid_name = gid_name self.dt_range = dt_range self._mapper = None if use_mapping: # use if tsdb is anonymized but amulog db is original from amulog import anonymize self._mapper = anonymize.AnonymizeMapper(self.conf) self._mapper.load()
def show_graph_diff_search(ns): l_conffp = ns.confs assert len(l_conffp) == 2 openconf = lambda c: config.open_config( c, ex_defaults=[arguments.DEFAULT_CONFIG]) conf1, conf2 = [openconf(c) for c in l_conffp] lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) gid = ns.gid from . import comp_conf comp_conf.edge_diff_gid_search(conf1, conf2, gid)
def show_trouble(ns): conf = open_logdag_config(ns) amulog_conf = config.open_config(conf["database_amulog"]["source_conf"]) tid = ns.tid from . import trouble tm = trouble.init_trouble_manager(conf) from amulog import log_db ld = log_db.LogData(amulog_conf) tr = tm[tid] print(tr) print("\n".join(tr.get_message(ld, show_lid=ns.lid_header)))
def test_tagging(self): from amulog.alg.crf import init_ltgen conf = config.open_config() conf["log_template_crf"]["model_filename"] = self._path_model table = lt_common.TemplateTable() ltgen = init_ltgen(conf, table) ltgen.init_trainer() ltgen.train_from_file(self._path_trainfile) tmp_pline = {"words": self.data_test} tpl = ltgen.generate_tpl(tmp_pline) self.assertTrue("ssh" in tpl) self.assertTrue(lt_common.REPLACER in tpl)
def setUpClass(cls): fd_testlog, cls._path_testlog = tempfile.mkstemp() os.close(fd_testlog) fd_testdb, cls._path_testdb = tempfile.mkstemp() os.close(fd_testdb) fd_ltgendump, cls._path_ltgendump = tempfile.mkstemp() os.close(fd_ltgendump) cls._conf = config.open_config() cls._conf['general']['src_path'] = cls._path_testlog cls._conf['database']['sqlite3_filename'] = cls._path_testdb cls._conf['manager']['indata_filename'] = cls._path_ltgendump tlg = testlog.TestLogGenerator(testlog.DEFAULT_CONFIG, seed=3) tlg.dump_log(cls._path_testlog)
def test_anonymize_restore(self): from amulog import __main__ as amulog_main from amulog import manager targets = amulog_main.get_targets_conf(self._amulog_conf) manager.process_files_online(self._amulog_conf, targets, reset_db=True) from amulog import anonymize am = anonymize.AnonymizeMapper(self._amulog_conf) am.anonymize(self._amulog_conf_anonymize) am.dump() conf = config.open_config(arguments.DEFAULT_CONFIG, base_default=False) conf["general"]["evdb"] = "sql" conf["database_sql"]["database"] = "sqlite3" conf["database_amulog"]["source_conf"] = self._path_amulogconf_anonymize conf["database_amulog"]["use_anonymize_mapping"] = "true" conf["database_amulog"]["given_amulog_database"] = "original" conf["database_sql"]["sqlite3_filename"] = self._path_testdb conf["dag"]["event_detail_cache"] = "false" conf["filter"]["rules"] = "" from logdag import dtutil from logdag.source import evgen_log w_term = self._whole_term size = config.str2dur("1d") el = evgen_log.LogEventLoader(conf) for dt_range in dtutil.iter_term(w_term, size): el.read(dt_range, dump_org=False) am = arguments.ArgumentManager(conf) am.generate(arguments.all_args) from logdag import makedag from logdag import showdag edge_cnt = 0 for args in am: conf["database_amulog"]["source_conf"] = self._path_amulogconf_anonymize ldag = makedag.makedag_main(args, do_dump=False) conf["database_amulog"]["source_conf"] = self._path_amulogconf showdag.show_subgraphs(ldag, "detail", load_cache=False, graph=None) # print(showdag.show_subgraphs(ldag, "detail", # load_cache=False, graph=None)) edge_cnt += ldag.number_of_edges() assert edge_cnt > 0
def test_tagging(self): conf = config.open_config() sym = conf.get("log_template", "variable_symbol") table = lt_common.TemplateTable() #converter = convert.FeatureExtracter() ltgen = lt_crf.LTGenCRF(table, sym, conf) l_items = [] for data_line in self.data_train: lineitem = [item.split() for item in data_line] l_items.append(lineitem) ltgen.init_trainer() ltgen.train(l_items) tid, state = ltgen.process_line(self.data_test, None) tpl = ltgen._table.get_template(tid) self.assertTrue("ssh" in tpl) self.assertTrue(sym in tpl) common.rm(ltgen.model)
def list_trouble_label(ns): conf = open_logdag_config(ns) amulog_conf = config.open_config(conf["database_amulog"]["source_conf"]) from . import trouble tm = trouble.init_trouble_manager(conf) from amulog import log_db ld = log_db.LogData(amulog_conf) gid_name = conf.get("database_amulog", "event_gid") for tr in tm: d_ev, d_gid, d_host = trouble.event_stat(tr, ld, gid_name) d_group = trouble.event_label(d_gid, ld, gid_name) buf = "{0} ({1}): ".format(tr.tid, tr.data["group"]) for group, l_gid in sorted(d_group.items(), key=lambda x: len(x[1]), reverse=True): num = sum([d_gid[gid] for gid in l_gid]) buf += "{0}({1},{2}) ".format(group, len(l_gid), num) print(buf)
def show_graph_diff_lts(ns): l_conffp = ns.confs assert len(l_conffp) == 2 openconf = lambda c: config.open_config( c, ex_defaults=[arguments.DEFAULT_CONFIG]) conf1, conf2 = [openconf(c) for c in l_conffp] lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) from amulog import log_db ld = log_db.LogData(conf1) from . import comp_conf d_ltid = comp_conf.edge_diff_gid(conf1, conf2) for ltid, l_name in sorted(d_ltid.items(), key=lambda x: len(x[1]), reverse=True): print("{0}: {1} ({2})".format(len(l_name), ltid, ld.lt(ltid))) if len(l_name) < 100: print(l_name)
def draw_graph_diff(ns): l_conffp = ns.confs assert len(l_conffp) == 2 openconf = lambda c: config.open_config( c, ex_defaults=[arguments.DEFAULT_CONFIG]) conf1, conf2 = [openconf(c) for c in l_conffp] lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) dts = dtutil.shortstr2dt(ns.timestr) dte = dts + config.getdur(conf1, "dag", "unit_term") output = ns.filename from . import comp_conf cevmap, cgraph = comp_conf.edge_set_diff(conf1, conf2, (dts, dte)) from . import draw rgraph = draw.relabel_graph(conf1, cgraph, cevmap) draw.graph_nx(output, rgraph) print(output)
def all_args(conf): amulog_conf = config.open_config(conf["database_amulog"]["source_conf"]) from amulog import log_db ld = log_db.LogData(amulog_conf) w_top_dt, w_end_dt = config.getterm(conf, "dag", "whole_term") term = config.getdur(conf, "dag", "unit_term") diff = config.getdur(conf, "dag", "unit_diff") l_args = [] top_dt = w_top_dt while top_dt < w_end_dt: end_dt = top_dt + term l_area = config.getlist(conf, "dag", "area") if "each" in l_area: l_area.pop(l_area.index("each")) l_area += [ "host_" + host for host in ld.whole_host(top_dt, end_dt) ] for area in l_area: l_args.append((conf, (top_dt, end_dt), area)) top_dt = top_dt + diff return l_args
def show_graph_lor_edges(ns): l_conffp = ns.confs assert len(l_conffp) == 2 openconf = lambda c: config.open_config( c, ex_defaults=[arguments.DEFAULT_CONFIG]) conf1, conf2 = [openconf(c) for c in l_conffp] lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) from . import comp_conf edge_sum = 0 d_edges = {} am = arguments.ArgumentManager(conf1) am.load() for dt_range in am.iter_dt_range(): cevmap, cgraph = comp_conf.edge_set_lor(conf1, conf2, dt_range) edge_sum += cgraph.number_of_edges() d_edges[dt_range[0]] = cgraph.edges() print("logical disjunction edge num: {0}".format(edge_sum)) for k, v in sorted(d_edges.items(), key=lambda x: x[0]): print("{0}: {1}".format(k, len(v)))
def setUpClass(cls): fd_testlog, cls._path_testlog = tempfile.mkstemp() os.close(fd_testlog) fd_amulogdb, cls._path_amulogdb = tempfile.mkstemp() os.close(fd_amulogdb) fd_amulogdb, cls._path_amulogdb_anonymize = tempfile.mkstemp() os.close(fd_amulogdb) fd_ltgendump, cls._path_ltgendump = tempfile.mkstemp() os.close(fd_ltgendump) fd_testdb, cls._path_testdb = tempfile.mkstemp() os.close(fd_testdb) fd_anonymap, cls._path_anonymize_mapping = tempfile.mkstemp() os.close(fd_anonymap) cls._amulog_conf = config.open_config() cls._amulog_conf['general']['src_path'] = cls._path_testlog cls._amulog_conf['database']['sqlite3_filename'] = cls._path_amulogdb cls._amulog_conf['manager']['indata_filename'] = cls._path_ltgendump cls._amulog_conf['visual']['anonymize_mapping_file'] = cls._path_anonymize_mapping cls._amulog_conf['visual']['anonymize_overwrite_method'] = "standard" fd_amulogconf, cls._path_amulogconf = tempfile.mkstemp() f = os.fdopen(fd_amulogconf, "w") cls._amulog_conf.write(f) f.close() import copy cls._amulog_conf_anonymize = copy.deepcopy(cls._amulog_conf) cls._amulog_conf_anonymize['database']['sqlite3_filename'] = cls._path_amulogdb_anonymize fd_amulogconf, cls._path_amulogconf_anonymize = tempfile.mkstemp() f = os.fdopen(fd_amulogconf, "w") cls._amulog_conf_anonymize.write(f) f.close() tlg = testutil.TestLogGenerator(testutil.DEFAULT_CONFIG, seed=3) tlg.dump_log(cls._path_testlog) cls._whole_term = tlg.term
def show_diff_info(ns): l_conffp = ns.confs assert len(l_conffp) == 2 openconf = lambda c: config.open_config( c, ex_defaults=[arguments.DEFAULT_CONFIG]) conf1, conf2 = [openconf(c) for c in l_conffp] lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) from . import comp_conf d = defaultdict(int) am = arguments.ArgumentManager(conf1) am.load() for dt_range in am.iter_dt_range(): cevmap_common, cgraph_common = comp_conf.edge_set_common( conf1, conf2, dt_range) d["common"] += cgraph_common.number_of_edges() cevmap_lor, cgraph_lor = comp_conf.edge_set_lor(conf1, conf2, dt_range) d["lor"] += cgraph_lor.number_of_edges() cevmap_diff1, cgraph_diff1 = comp_conf.edge_set_diff(conf1, conf2, dt_range, lor=(cevmap_lor, cgraph_lor)) d["diff1"] += cgraph_diff1.number_of_edges() cevmap_diff2, cgraph_diff2 = comp_conf.edge_set_diff(conf2, conf1, dt_range, lor=(cevmap_lor, cgraph_lor)) d["diff2"] += cgraph_diff2.number_of_edges() print("Logical OR edges: {0}".format(d["lor"])) print("Common edges: {0}".format(d["common"])) print("Edges only found in {0}: {1}".format(ns.confs[0], d["diff1"])) print("Edges only found in {0}: {1}".format(ns.confs[1], d["diff2"]))