Пример #1
0
def similar_block_dag(conf, top_dt, end_dt, area, method, ignore_same=True):
    src_dir = conf.get("dag", "output_dir")
    ig_direction = conf.getboolean("search", "dag_ig_direction")
    wflag = conf.getboolean("search", "dag_weight")

    dur = conf.getdur("dag", "stat_bin")
    name = pc_log.thread_name(conf, top_dt, end_dt, dur, area)
    if name in common.rep_dir(src_dir):
        r_temp = pcresult.PCOutput(conf).load(name)
    else:
        r_temp = pc_log.pc_log(conf, top_dt, end_dt, dur, area, dump=False)

    src_dir = conf.get("dag", "output_dir")
    l_r = pcresult.results_in_area(conf, src_dir, area)
    weight = None
    if wflag:
        weight = pcresult.EdgeTFIDF(l_r)

    result = []
    for r in l_r:
        if ignore_same and (r.end_dt > top_dt and r.top_dt < end_dt):
            # ignore if common term included
            pass
        else:
            if method == "dag_ed":
                dist = pcresult.graph_edit_distance(r_temp, r, ig_direction, weight)
            elif method == "dag_mcs":
                dist = pcresult.mcs_size_ratio(r_temp, r, ig_direction, weight)
            else:
                raise NotImplementedError
            result.append((r, dist))

    return result
Пример #2
0
def test_dag_search(conf, method, area = None):
    import cg_dag
    if method is None:
        method = conf.get("search", "method")

    src_dir = conf.get("dag", "output_dir")
    if area is None:
        l_area = pcresult.result_areas(conf)
    else:
        l_area = [area]
    for area in l_area:
        l_r = pcresult.results_in_area(conf, src_dir, area)
        result = []
        for r in l_r:
            if method == "log":
                result = similar_block_log(conf, r.top_dt, r.end_dt, r.area,
                        ignore_same = True)
            elif method in ("dag_ed", "dag_mcs"):
                result = cg_dag.similar_block_dag(conf, r.top_dt, r.end_dt,
                        r.area, method, ignore_same = True)
            else:
                raise NotImplementedError
            print r.cond_str()
            if len(result) > 10:
                result = ex_sorted(result,
                        key = lambda x: x[1], reverse = False)[:10]
            for r_found, val in result:
                print val, r_found.cond_str() 
            print
Пример #3
0
def similar_block_log(conf, top_dt, end_dt, area, ignore_same = True):
    
    #assert conf.get("search", "method") == "log"
    ld = log_db.LogData(conf)
    dagc = DAGComparison(conf, area)

    edict = dagc.data_for_cond(top_dt, end_dt, area)
    if edict is None:
        edict = {}
        for line in ld.iter_lines(top_dt = top_dt, end_dt = end_dt,
                area = area):
            weid = dagc.w_evmap.process_line(line)
            edict[weid] = edict.get(weid, 0) + 1
    l_weid = edict.keys()
    src_evv = _event_vector(l_weid, edict, dagc)
    print("{0} - {1} ({2}) : {3}".format(top_dt, end_dt, area, src_evv))

    src_dir = conf.get("dag", "output_dir")
    l_r = pcresult.results_in_area(conf, src_dir, area)
    result = []
    for r in l_r:
        if ignore_same and (r.end_dt > top_dt and r.top_dt < end_dt):
            # ignore if common term included
            pass
        else:
            edict_r = dagc.data_for_r(r)
            r_evv = _event_vector(l_weid, edict_r, dagc)
            print("{0} - {1} ({2}) : {3}".format(r.top_dt, r.end_dt,
                    r.area, r_evv))
            dist = _evv_distance(src_evv, r_evv)
            result.append((r, dist))

    return result
Пример #4
0
def similar_block_dag(conf, top_dt, end_dt, area, method, ignore_same=True):
    src_dir = conf.get("dag", "output_dir")
    ig_direction = conf.getboolean("search", "dag_ig_direction")
    wflag = conf.getboolean("search", "dag_weight")

    dur = conf.getdur("dag", "stat_bin")
    name = pc_log.thread_name(conf, top_dt, end_dt, dur, area)
    if name in common.rep_dir(src_dir):
        r_temp = pcresult.PCOutput(conf).load(name)
    else:
        r_temp = pc_log.pc_log(conf, top_dt, end_dt, dur, area, dump=False)

    src_dir = conf.get("dag", "output_dir")
    l_r = pcresult.results_in_area(conf, src_dir, area)
    weight = None
    if wflag:
        weight = pcresult.EdgeTFIDF(l_r)

    result = []
    for r in l_r:
        if ignore_same and (r.end_dt > top_dt and r.top_dt < end_dt):
            # ignore if common term included
            pass
        else:
            if method == "dag_ed":
                dist = pcresult.graph_edit_distance(r_temp, r, ig_direction,
                                                    weight)
            elif method == "dag_mcs":
                dist = pcresult.mcs_size_ratio(r_temp, r, ig_direction, weight)
            else:
                raise NotImplementedError
            result.append((r, dist))

    return result
Пример #5
0
    def _init_event_stat(self, conf):
        ld = log_db.LogData(conf)
        w_top_dt, w_end_dt = pc_log.whole_term(conf, ld)
        gid_name = conf.get("dag", "event_gid")
        self.w_evmap = log2event.EventDefinitionMap(w_top_dt, w_end_dt,
                gid_name)

        src_dir = conf.get("dag", "output_dir")
        l_r = pcresult.results_in_area(conf, src_dir, self.area)
        for r in l_r:
            edict = {}
            for line in ld.iter_lines(top_dt = r.top_dt, end_dt = r.end_dt,
                    area = r.area):
                weid = self.w_evmap.process_line(line)
                edict[weid] = edict.get(weid, 0) + 1
            self.d_ev[(r.top_dt, r.end_dt, r.area)] = edict
Пример #6
0
def heatmap(conf, method, area, fn):

    def result2data(result, l_label):
        d_temp = {}
        for r, dist in result:
            d_temp[r.get_fn()] = dist
        return [d_temp[label] for label in l_label]

    import cg_dag
    if method is None:
        method = conf.get("search", "method")
    if area is None:
        area = "all"

    src_dir = conf.get("dag", "output_dir")
    l_r = pcresult.results_in_area(conf, src_dir, area)
    l_label = [r.get_fn() for r in l_r]
    l_result = []
    for r in l_r:
        if method == "log":
            result = similar_block_log(conf, r.top_dt, r.end_dt,
                    r.area, ignore_same = False)
        elif method in ("dag_ed", "dag_mcs"):
            result = cg_dag.similar_block_dag(conf,
                    r.top_dt, r.end_dt, r.area, method, ignore_same = False)
        else:
            raise NotImplementedError
        l_result.append(result2data(result, l_label))

    # replace None to max value of whole result
    mval = max([max(result) for result in l_result])
    data = np.array([[mval if i is None else i for i in result] for result
            in l_result])
    # data = np.array(l_result)
    n = len(l_r)
    assert data.shape == (n, n)
    x, y = np.meshgrid(np.arange(n + 1), np.arange(n + 1)) 

    explot.dump(fn + ".temp", (x, y, data))

    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    cm = explot.generate_cmap(["orangered", "white"])
    plt.pcolor(x, y, data, cmap = cm)
    plt.colorbar()
    plt.savefig(fn)