def similar_block_dag(conf, top_dt, end_dt, area, method, ignore_same=True): src_dir = conf.get("dag", "output_dir") ig_direction = conf.getboolean("search", "dag_ig_direction") wflag = conf.getboolean("search", "dag_weight") dur = conf.getdur("dag", "stat_bin") name = pc_log.thread_name(conf, top_dt, end_dt, dur, area) if name in common.rep_dir(src_dir): r_temp = pcresult.PCOutput(conf).load(name) else: r_temp = pc_log.pc_log(conf, top_dt, end_dt, dur, area, dump=False) src_dir = conf.get("dag", "output_dir") l_r = pcresult.results_in_area(conf, src_dir, area) weight = None if wflag: weight = pcresult.EdgeTFIDF(l_r) result = [] for r in l_r: if ignore_same and (r.end_dt > top_dt and r.top_dt < end_dt): # ignore if common term included pass else: if method == "dag_ed": dist = pcresult.graph_edit_distance(r_temp, r, ig_direction, weight) elif method == "dag_mcs": dist = pcresult.mcs_size_ratio(r_temp, r, ig_direction, weight) else: raise NotImplementedError result.append((r, dist)) return result
def test_dag_search(conf, method, area = None): import cg_dag if method is None: method = conf.get("search", "method") src_dir = conf.get("dag", "output_dir") if area is None: l_area = pcresult.result_areas(conf) else: l_area = [area] for area in l_area: l_r = pcresult.results_in_area(conf, src_dir, area) result = [] for r in l_r: if method == "log": result = similar_block_log(conf, r.top_dt, r.end_dt, r.area, ignore_same = True) elif method in ("dag_ed", "dag_mcs"): result = cg_dag.similar_block_dag(conf, r.top_dt, r.end_dt, r.area, method, ignore_same = True) else: raise NotImplementedError print r.cond_str() if len(result) > 10: result = ex_sorted(result, key = lambda x: x[1], reverse = False)[:10] for r_found, val in result: print val, r_found.cond_str() print
def similar_block_log(conf, top_dt, end_dt, area, ignore_same = True): #assert conf.get("search", "method") == "log" ld = log_db.LogData(conf) dagc = DAGComparison(conf, area) edict = dagc.data_for_cond(top_dt, end_dt, area) if edict is None: edict = {} for line in ld.iter_lines(top_dt = top_dt, end_dt = end_dt, area = area): weid = dagc.w_evmap.process_line(line) edict[weid] = edict.get(weid, 0) + 1 l_weid = edict.keys() src_evv = _event_vector(l_weid, edict, dagc) print("{0} - {1} ({2}) : {3}".format(top_dt, end_dt, area, src_evv)) src_dir = conf.get("dag", "output_dir") l_r = pcresult.results_in_area(conf, src_dir, area) result = [] for r in l_r: if ignore_same and (r.end_dt > top_dt and r.top_dt < end_dt): # ignore if common term included pass else: edict_r = dagc.data_for_r(r) r_evv = _event_vector(l_weid, edict_r, dagc) print("{0} - {1} ({2}) : {3}".format(r.top_dt, r.end_dt, r.area, r_evv)) dist = _evv_distance(src_evv, r_evv) result.append((r, dist)) return result
def _init_event_stat(self, conf): ld = log_db.LogData(conf) w_top_dt, w_end_dt = pc_log.whole_term(conf, ld) gid_name = conf.get("dag", "event_gid") self.w_evmap = log2event.EventDefinitionMap(w_top_dt, w_end_dt, gid_name) src_dir = conf.get("dag", "output_dir") l_r = pcresult.results_in_area(conf, src_dir, self.area) for r in l_r: edict = {} for line in ld.iter_lines(top_dt = r.top_dt, end_dt = r.end_dt, area = r.area): weid = self.w_evmap.process_line(line) edict[weid] = edict.get(weid, 0) + 1 self.d_ev[(r.top_dt, r.end_dt, r.area)] = edict
def heatmap(conf, method, area, fn): def result2data(result, l_label): d_temp = {} for r, dist in result: d_temp[r.get_fn()] = dist return [d_temp[label] for label in l_label] import cg_dag if method is None: method = conf.get("search", "method") if area is None: area = "all" src_dir = conf.get("dag", "output_dir") l_r = pcresult.results_in_area(conf, src_dir, area) l_label = [r.get_fn() for r in l_r] l_result = [] for r in l_r: if method == "log": result = similar_block_log(conf, r.top_dt, r.end_dt, r.area, ignore_same = False) elif method in ("dag_ed", "dag_mcs"): result = cg_dag.similar_block_dag(conf, r.top_dt, r.end_dt, r.area, method, ignore_same = False) else: raise NotImplementedError l_result.append(result2data(result, l_label)) # replace None to max value of whole result mval = max([max(result) for result in l_result]) data = np.array([[mval if i is None else i for i in result] for result in l_result]) # data = np.array(l_result) n = len(l_r) assert data.shape == (n, n) x, y = np.meshgrid(np.arange(n + 1), np.arange(n + 1)) explot.dump(fn + ".temp", (x, y, data)) import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt cm = explot.generate_cmap(["orangered", "white"]) plt.pcolor(x, y, data, cmap = cm) plt.colorbar() plt.savefig(fn)