def diff_event_edge(conf1, conf2): d_set1 = defaultdict(set) d_set2 = defaultdict(set) d_diff = {} dirname = conf1.get("dag", "event_dir") for fp in common.rep_dir(dirname): fn = fp.split("/")[-1] edict1, evmap1 = log2event.load_edict(fp) for evdef in [evmap1.info(k) for k in edict1.keys()]: d_set1[fn].add(evdef) dirname = conf2.get("dag", "event_dir") for fp in common.rep_dir(dirname): fn = fp.split("/")[-1] edict2, evmap2 = log2event.load_edict(fp) for evdef in [evmap2.info(k) for k in edict2.keys()]: d_set2[fn].add(evdef) for k in d_set1.keys(): if not d_set2.has_key(k): raise KeyError("{0} not found in event set 2".format(k)) s1 = d_set1[k] s2 = d_set2[k] d_diff[k] = (s1 - s2) return d_diff
def _get_targets(conf, args, recur): if len(args) == 0: if conf.getboolean("general", "src_recur") or recur: targets = common.recur_dir(conf.getlist("general", "src_path")) else: targets = common.rep_dir(conf.getlist("general", "src_path")) else: if recur: targets = common.recur_dir(args) else: targets = common.rep_dir(args) return targets
def count_edge_label_extype(conf): ll = init_ltlabel(conf) ld = log_db.LogData(conf) import pcresult s_keys = set() d_extype = defaultdict(int) src_dir = conf.get("dag", "output_dir") for fp in common.rep_dir(src_dir): r = pcresult.PCOutput(conf).load(fp) dedges, udedges = r._separate_edges() for edge in dedges + udedges: l_group = [r._label_group_ltg(r.evmap.info(eid).gid) for eid in edge] for group in l_group: s_keys.add(group) if l_group[0] == l_group[1]: d_extype[tuple(l_group)] += 1 else: d_extype[(l_group[0], l_group[1])] += 1 d_extype[(l_group[1], l_group[0])] += 1 table = [] table.append(["group"] + list(s_keys)) for key1 in s_keys: buf = [key1] for key2 in s_keys: cnt = d_extype[(key1, key2)] buf.append(cnt) table.append(buf) print common.cli_table(table)
def results_in_area(conf, src_dir, area): l_result = [] for fp in common.rep_dir(src_dir): r = PCOutput(conf).load(fp) if r.area == area: l_result.append(r) return l_result
def mk_dict(self, src_path): for fp in common.rep_dir(src_path): with open(fp, 'r') as f: for line in f: dt, host, l_w, l_s = logparser.process_line(line) if l_w is None: continue self._count_line(l_w)
def similar_block_dag(conf, top_dt, end_dt, area, method, ignore_same=True): src_dir = conf.get("dag", "output_dir") ig_direction = conf.getboolean("search", "dag_ig_direction") wflag = conf.getboolean("search", "dag_weight") dur = conf.getdur("dag", "stat_bin") name = pc_log.thread_name(conf, top_dt, end_dt, dur, area) if name in common.rep_dir(src_dir): r_temp = pcresult.PCOutput(conf).load(name) else: r_temp = pc_log.pc_log(conf, top_dt, end_dt, dur, area, dump=False) src_dir = conf.get("dag", "output_dir") l_r = pcresult.results_in_area(conf, src_dir, area) weight = None if wflag: weight = pcresult.EdgeTFIDF(l_r) result = [] for r in l_r: if ignore_same and (r.end_dt > top_dt and r.top_dt < end_dt): # ignore if common term included pass else: if method == "dag_ed": dist = pcresult.graph_edit_distance(r_temp, r, ig_direction, weight) elif method == "dag_mcs": dist = pcresult.mcs_size_ratio(r_temp, r, ig_direction, weight) else: raise NotImplementedError result.append((r, dist)) return result
def list_maximum_clique(conf): src_dir = conf.get("dag", "output_dir") for fp in common.rep_dir(src_dir): r = PCOutput(conf).load(fp) g = r.graph.to_undirected() ret = graph_maximum_clique(g) print ret, r.filename
def similar_graph(conf, result, area, alg, cand=20): # ed, mcs, edw, mcsw assert result.area == area src_dir = conf.get("dag", "output_dir") l_result = [] for fp in common.rep_dir(src_dir): r = PCOutput(conf).load(fp) if r.area == area: l_result.append(r) weight = None if "w" in alg: weight = EdgeTFIDF(l_result) data = [] for r in l_result: if r.filename == result.filename: continue if alg.rstrip("w") == "ed": dist = graph_edit_distance(result, r, True, weight) elif alg.rstrip("w") == "mcs": dist = mcs_size_ratio(result, r, True, weight) else: raise ValueError() data.append((dist, r)) #data = sorted(data, key = lambda x: x[0], reverse = False) data = ex_sorted(data, key=lambda x: x[0], reverse=False) for d in data[:cand]: print d[0], d[1].filename
def result_areas(conf): s_area = set() src_dir = conf.get("dag", "output_dir") for fp in common.rep_dir(src_dir): r = PCOutput(conf).load(fp) s_area.add(r.area) return list(s_area)
def similar_graph(conf, result, area, alg, cand = 20): # ed, mcs, edw, mcsw assert result.area == area src_dir = conf.get("dag", "output_dir") l_result = [] for fp in common.rep_dir(src_dir): r = PCOutput(conf).load(fp) if r.area == area: l_result.append(r) weight = None if "w" in alg: weight = EdgeTFIDF(l_result) data = [] for r in l_result: if r.filename == result.filename: continue if alg.rstrip("w") == "ed": dist = graph_edit_distance(result, r, True, weight) elif alg.rstrip("w") == "mcs": dist = mcs_size_ratio(result, r, True, weight) else: raise ValueError() data.append((dist, r)) #data = sorted(data, key = lambda x: x[0], reverse = False) data = ex_sorted(data, key = lambda x: x[0], reverse = False) for d in data[:cand]: print d[0], d[1].filename
def list_clustering_coefficient(conf): src_dir = conf.get("dag", "output_dir") for fp in common.rep_dir(src_dir): r = PCOutput(conf).load(fp) g = r.graph.to_undirected() ret = graph_clustering_coefficient(g) print ret, r.filename
def results(conf, src_dir=None): if src_dir is None: src_dir = conf.get("dag", "output_dir") l_result = [] for fp in common.rep_dir(src_dir): l_result.append(PCOutput(conf).load(fp)) l_result.sort(key=lambda r: r.area) return l_result
def results(conf, src_dir = None): if src_dir is None: src_dir = conf.get("dag", "output_dir") l_result = [] for fp in common.rep_dir(src_dir): l_result.append(PCOutput(conf).load(fp)) l_result.sort(key = lambda r: r.area) return l_result
def whole_netsize(conf): src_dir = conf.get("dag", "output_dir") d_size = {} for fp in common.rep_dir(src_dir): r = PCOutput(conf).load(fp) for net in graph_network(r.graph): d_size[len(net)] = d_size.get(len(net), 0) + 1 for size, cnt in d_size.items(): print size, cnt
def get_dict_eventset(conf): d_set = defaultdict(set) dirname = conf.get("dag", "event_dir") for fp in common.rep_dir(dirname): fn = fp.split("/")[-1] temp_edict, temp_evmap = log2event.load_edict(fp) for k in temp_edict.keys(): evdef = temp_evmap.info(k) d_set[fn].add(evdef) return d_set
def test_parse(conf): LP = LogParser(conf) ret = [] if conf.getboolean("general", "src_recur"): l_fp = common.recur_dir(conf.getlist("general", "src_path")) else: l_fp = common.rep_dir(conf.getlist("general", "src_path")) for fp in l_fp: with open(fp, 'r') as f: for line in f: ret.append(LP.process_line(line.rstrip("\n"))) return ret
def list_netsize(conf): src_dir = conf.get("dag", "output_dir") for fp in common.rep_dir(src_dir): r = PCOutput(conf).load(fp) d_size = {} for net in graph_network(r.graph): d_size[len(net)] = d_size.get(len(net), 0) + 1 buf = [] for size, cnt in sorted(d_size.items(), reverse=True): if cnt == 1: buf.append(str(size)) else: buf.append("{0}x{1}".format(size, cnt)) print "{0} : {1}".format(r.result_fn(), ", ".join(buf))
def list_netsize(conf): src_dir = conf.get("dag", "output_dir") for fp in common.rep_dir(src_dir): r = PCOutput(conf).load(fp) d_size = {} for net in graph_network(r.graph): d_size[len(net)] = d_size.get(len(net), 0) + 1 buf = [] for size, cnt in sorted(d_size.items(), reverse = True): if cnt == 1: buf.append(str(size)) else: buf.append("{0}x{1}".format(size, cnt)) print "{0} : {1}".format(r.result_fn(), ", ".join(buf))
def get_dict_event_replaced(conf): d = {} type_rp = log2event.EventDefinitionMap.type_periodic_remainder dirname = conf.get("dag", "event_dir") for fp in common.rep_dir(dirname): fn = fp.split("/")[-1] edict, evmap = log2event.load_edict(fp) l_evdef = [] for eid in edict.keys(): evdef = evmap.info(eid) if evdef.type == type_rp: l_evdef.append(evdef) else: pass if len(l_evdef) > 0: d[fn] = l_evdef return d
def event_label(conf): import log_db ld = log_db.LogData(conf) import lt_label ll = lt_label.init_ltlabel(conf) d_group = defaultdict(int) dirname = conf.get("dag", "event_dir") for fp in common.rep_dir(dirname): fn = fp.split("/")[-1] edict, evmap = log2event.load_edict(fp) for evdef in [evmap.info(k) for k in edict.keys()]: gid = evdef.gid l_lt = ld.ltg_members(gid) group = ll.get_ltg_group(gid, l_lt) d_group[group] += 1 return d_group
def event_replaced(conf): type_rp = log2event.EventDefinitionMap.type_periodic_remainder dirname = conf.get("dag", "event_dir") for fp in common.rep_dir(dirname): fn = fp.split("/")[-1] area, gid = fn.split("_") edict, evmap = log2event.load_edict(fp) l_evdef = [] for eid in edict.keys(): evdef = evmap.info(eid) if evdef.type == type_rp: l_evdef.append(evdef) else: pass if len(l_evdef) > 0: print("# {0}".format(fn)) for evdef in l_evdef: print(evdef)
def list_detailed_results(conf): src_dir = conf.get("dag", "output_dir") splitter = "," print splitter.join( ["dt", "area", "node", "edge", "edge_oh", "d_edge", "d_edge_oh", "fn"]) for fp in common.rep_dir(src_dir): r = PCOutput(conf).load(fp) row = [] row.append(str(r.top_dt)) row.append(str(r.area)) row.append(str(len(r.graph.nodes()))) edge_num = number_of_edges(r.graph) row.append(str(edge_num)) row.append(str(count_edges(r._edge_across_host()))) dedges, udedges = r._separate_edges() row.append(str(count_edges(dedges))) row.append(str(count_edges(r._edge_across_host(dedges)))) row.append(r.result_fn()) print ",".join(row)
def search_edge_label_extype(conf, label1, label2): ll = init_ltlabel(conf) ld = log_db.LogData(conf) import pcresult src_dir = conf.get("dag", "output_dir") for fp in common.rep_dir(src_dir): r = pcresult.PCOutput(conf).load(fp) rflag = False dedges, udedges = r._separate_edges() for edge in dedges + udedges: l_group = [r._label_group_ltg(r.evmap.info(eid).gid) for eid in edge] if (l_group[0] == label1 and l_group[1] == label2) or \ (l_group[1] == label1 and l_group[0] == label2): if not rflag: print("# {0}".format(r.filename)) rflag = True r._print_edge(edge, False) r._print_edge_lt(edge)
def list_detailed_results(conf): src_dir = conf.get("dag", "output_dir") splitter = "," print splitter.join(["dt", "area", "node", "edge", "edge_oh", "d_edge", "d_edge_oh", "fn"]) for fp in common.rep_dir(src_dir): r = PCOutput(conf).load(fp) row = [] row.append(str(r.top_dt)) row.append(str(r.area)) row.append(str(len(r.graph.nodes()))) edge_num = number_of_edges(r.graph) row.append(str(edge_num)) row.append(str(count_edges(r._edge_across_host()))) dedges, udedges = r._separate_edges() row.append(str(count_edges(dedges))) row.append(str(count_edges(r._edge_across_host(dedges)))) row.append(r.result_fn()) print ",".join(row)
def show_results_sum(conf, src_dir): d = {} for fp in common.rep_dir(src_dir): r = PCOutput(conf).load(fp) ev = len(r.evmap) dedges, udedges = r._separate_edges() edge_num = number_of_edges(r.graph) edge_oh = count_edges(r._edge_across_host()) d_edge = count_edges(dedges) d_edge_oh = count_edges(r._edge_across_host(dedges)) d["event"] = d.get("event", 0) + ev d["edge"] = d.get("edge", 0) + edge_num d["edge_oh"] = d.get("edge_oh", 0) + edge_oh d["d_edge"] = d.get("d_edge", 0) + d_edge d["d_edge_oh"] = d.get("d_edge_oh", 0) + d_edge_oh table = [] table.append(["number of events (nodes)", d["event"], ""]) table.append(["number of edges", d["edge"], ""]) table.append([ "number of edges across hosts", d["edge_oh"], 1.0 * d["edge_oh"] / d["edge"] ]) table.append([ "number of directed edges", d["d_edge"], 1.0 * d["d_edge"] / d["edge"] ]) table.append([ "number of directed edges across hosts", d["d_edge_oh"], 1.0 * d["d_edge_oh"] / d["edge"] ]) table.append([ "number of undirected edges", d["edge"] - d["d_edge"], 1.0 * (d["edge"] - d["d_edge"]) / d["edge"] ]) table.append([ "number of undirected edges across hosts", d["edge_oh"] - d["d_edge_oh"], 1.0 * (d["edge_oh"] - d["d_edge_oh"]) / d["edge"] ]) print common.cli_table(table)
def count_edge_label(conf): ll = init_ltlabel(conf) import pcresult d_cnt_label = defaultdict(int) d_cnt_group = defaultdict(int) src_dir = conf.get("dag", "output_dir") for fp in common.rep_dir(src_dir): _logger.info("count_edge_label processing {0}".format(fp)) r = pcresult.PCOutput(conf).load(fp) for edge in r.graph.edges(): for eid in edge: gid = r.evmap.info(eid).gid label = r._label_ltg(gid) d_cnt_label[label] += 1 group = r._label_group_ltg(gid) d_cnt_group[group] += 1 for group, l_label in ll.d_group.iteritems(): cnt_group = d_cnt_group[group] print("group {0}: {1} nodes".format(group, cnt_group)) for label in l_label: cnt_label = d_cnt_label[label] print(" label {0}: {1} nodes".format(label, cnt_label)) print
def count_event_label(conf): import log2event ld = log_db.LogData(conf) ll = init_ltlabel(conf) d_label = defaultdict(int) d_group = defaultdict(int) dirname = conf.get("dag", "event_dir") for fp in common.rep_dir(dirname): fn = fp.split("/")[-1] edict, evmap = log2event.load_edict(fp) for eid, l_dt in edict.iteritems(): gid = evmap.info(eid).gid l_lt = ld.ltg_members(gid) label = ll.get_ltg_label(gid, l_lt) group = ll.get_group(label) d_label[label] += len(l_dt) d_group[group] += len(l_dt) print("all lines : {0}".format(sum(d_group.values()))) print for group, l_label in ll.d_group.iteritems(): if d_group.has_key(group): cnt_group = d_group.pop(group) else: cnt_group = 0 print("group {0}: {1} lines".format(group, cnt_group)) for label in l_label: if d_label.has_key(label): cnt_label = d_label.pop(label) else: cnt_label = 0 print(" label {0}: {1} lines".format(label, cnt_label)) print
process_files(conf, targets, True) timer.stop() elif mode == "make-init": targets = _get_targets(conf, args, options.recur) timer = common.Timer("log_db make-init", output = _logger) timer.start() process_init_data(conf, targets) timer.stop() elif mode == "add": if len(args) == 0: sys.exit("give me filenames of log data to add to DB") else: if options.recur: targets = common.recur_dir(args) else: targets = common.rep_dir(args) timer = common.Timer("log_db add", output = _logger) timer.start() process_files(conf, targets, False) timer.stop() elif mode == "update": if len(args) == 0: sys.exit("give me filenames of log data to add to DB") else: if options.recur: targets = common.recur_dir(args) else: targets = common.rep_dir(args) timer = common.Timer("log_db update", output = _logger) timer.start() process_files(conf, targets, False, diff = True)
def load_edict_dir(conf): """Yields (edict, evmap)""" dirname = conf.get("dag", "event_dir") for fp in common.rep_dir(dirname): yield load_edict(fp)
def _update_check(self): lm = common.last_modified(common.rep_dir(self.src_dir)) return lm > self.latest
return None, None, None, None l_word, l_symbol = self.split_message(message) return dt, host, l_word, l_symbol def test_parse(conf): LP = LogParser(conf) ret = [] if conf.getboolean("general", "src_recur"): l_fp = common.recur_dir(conf.getlist("general", "src_path")) else: l_fp = common.rep_dir(conf.getlist("general", "src_path")) for fp in l_fp: with open(fp, 'r') as f: for line in f: ret.append(LP.process_line(line.rstrip("\n"))) return ret if __name__ == "__main__": if len(sys.argv) < 3: sys.exit("usage: {0} config targets".format(sys.argv[0])) conf = config.open_config(sys.argv[1]) LP = LogParser(conf) for fp in common.rep_dir(sys.argv[2:]): with open(fp) as f: for line in f: print LP.process_line(line.rstrip("\n"))
l_word, l_symbol = self.split_message(message) return dt, host, l_word, l_symbol def test_parse(conf): LP = LogParser(conf) ret = [] if conf.getboolean("general", "src_recur"): l_fp = common.recur_dir(conf.getlist("general", "src_path")) else: l_fp = common.rep_dir(conf.getlist("general", "src_path")) for fp in l_fp: with open(fp, 'r') as f: for line in f: ret.append(LP.process_line(line.rstrip("\n"))) return ret if __name__ == "__main__": if len(sys.argv) < 3: sys.exit("usage: {0} config targets".format(sys.argv[0])) conf = config.open_config(sys.argv[1]) LP = LogParser(conf) for fp in common.rep_dir(sys.argv[2:]): with open(fp) as f: for line in f: #print LP.process_line(line.rstrip("\n")) print " ".join(LP.process_line(line.rstrip("\n"))[2])
def count_edge_label_detail(conf): ll = init_ltlabel(conf) ld = log_db.LogData(conf) import pcresult d_group = defaultdict(int) d_group_directed = defaultdict(int) d_group_intype = defaultdict(int) d_group_intype_directed = defaultdict(int) d_group_mean = defaultdict(int) d_group_mean_directed = defaultdict(int) import edge_filter ef = edge_filter.EdgeFilter(conf) src_dir = conf.get("dag", "output_dir") for fp in common.rep_dir(src_dir): _logger.info("count_edge_label_detail processing {0}".format(fp)) r = pcresult.PCOutput(conf).load(fp) dedges, udedges = r._separate_edges() for edge in dedges: cedge = [r.evmap.info(eid) for eid in edge] fflag = ef.isfiltered(cedge) l_group = [r._label_group_ltg(r.evmap.info(eid).gid) for eid in edge] iflag = (l_group[0] == l_group[1]) for group in l_group: d_group[group] += 1 d_group_directed[group] += 1 if iflag: d_group_intype[group] += 1 d_group_intype_directed[group] += 1 if not fflag: d_group_mean[group] += 1 d_group_mean_directed[group] += 1 for edge in udedges: cedge = [r.evmap.info(eid) for eid in edge] fflag = ef.isfiltered(cedge) l_group = [r._label_group_ltg(r.evmap.info(eid).gid) for eid in edge] iflag = (l_group[0] == l_group[1]) for group in l_group: d_group[group] += 1 if iflag: d_group_intype[group] += 1 if not fflag: d_group_mean[group] += 1 table = [["key", "all", "directed", "intype", "intype_directed", "important", "important_directed"]] for key in d_group.keys(): temp = [key] temp.append(d_group[key]) temp.append(d_group_directed[key]) temp.append(d_group_intype[key]) temp.append(d_group_intype_directed[key]) temp.append(d_group_mean[key]) temp.append(d_group_mean_directed[key]) table.append(temp) table.append(["total", sum(d_group.values()), sum(d_group_directed.values()), sum(d_group_intype.values()), sum(d_group_intype_directed.values()), sum(d_group_mean.values()), sum(d_group_mean_directed.values())]) print common.cli_table(table)