示例#1
0
def init_logfilter(conf, source):
    kwargs = dict(conf["filter"])
    kwargs["pre_count"] = conf.getint("filter", "pre_count")
    kwargs["pre_term"] = config.getdur(conf, "filter", "pre_term")
    kwargs["fourier_sample_rule"] = [
        tuple(config.str2dur(s) for s in dt_cond.split("_"))
        for dt_cond in config.gettuple(conf, "filter",
                                       "fourier_sample_rule")]
    kwargs["fourier_th_spec"] = conf.getfloat("filter", "fourier_th_spec")
    kwargs["fourier_th_eval"] = conf.getfloat("filter", "fourier_th_eval")
    kwargs["fourier_th_restore"] = conf.getfloat("filter",
                                                 "fourier_th_restore")
    kwargs["fourier_peak_order"] = conf.getint("filter", "fourier_peak_order")

    kwargs["corr_sample_rule"] = [
        tuple(config.str2dur(s) for s in dt_cond.split("_"))
        for dt_cond in config.gettuple(conf, "filter", "corr_sample_rule")]
    kwargs["corr_th"] = conf.getfloat("filter", "corr_th")
    kwargs["corr_diff"] = [config.str2dur(diffstr) for diffstr
                           in config.gettuple(conf, "filter", "corr_diff")]

    kwargs["linear_sample_rule"] = [
        tuple(config.str2dur(s) for s in dt_cond.split("_"))
        for dt_cond in config.gettuple(conf, "filter",
                                       "linear_sample_rule")]
    kwargs["linear_count"] = conf.getint("filter", "linear_count")
    kwargs["linear_th"] = conf.getfloat("filter", "linear_th")

    return LogFilter(source, **kwargs)
示例#2
0
    def _iter_feature_terms(self, feature_def, dt_range):
        # Avoid convolve boundary problem
        if "convolve" in feature_def["func_list"]:
            if "convolve_radius" in feature_def:
                convolve_radius = feature_def["convolve_radius"]
            else:
                # compatibility for configparser-style rule
                convolve_radius = self.conf.getint("general",
                                                   "evdb_convolve_radius")
        else:
            convolve_radius = 0
        sense_offset = self._feature_bin_size * convolve_radius

        # datetimeindex.get_loc includes stop time (unlike other types!)
        # dtindex_offset remove the stop time
        dtindex_offset = self._feature_bin_size

        if "data_range" in feature_def:
            unit_term = config.str2dur(feature_def["data_range"])
            if "sense_range" in feature_def:
                unit_diff = config.str2dur(feature_def["sense_range"])
            else:
                unit_diff = unit_term
        else:
            # compatibility for configparser-style rule
            unit_term = config.getdur(self.conf, "general", "evdb_unit_term")
            unit_diff = config.getdur(self.conf, "general", "evdb_unit_diff")

        for dts, dte in dtutil.iter_term(dt_range, unit_diff):
            sense_dts = max(dt_range[0], dte - unit_term)
            yield ((dts, dte - dtindex_offset),
                   (sense_dts - sense_offset,
                    dte - dtindex_offset + sense_offset))
示例#3
0
    def test_load_asis(self):
        conf = config.open_config(arguments.DEFAULT_CONFIG, base_default=False)
        conf["general"]["evdb"] = "sql"
        conf["database_sql"]["database"] = "sqlite3"
        conf["database_amulog"]["source_conf"] = self._path_amulogconf
        conf["database_sql"]["sqlite3_filename"] = self._path_testdb

        conf["filter"]["rules"] = ""

        from logdag import dtutil
        from logdag.source import evgen_log
        w_term = self._whole_term
        size = config.str2dur("1d")
        el = evgen_log.LogEventLoader(conf)
        for dt_range in dtutil.iter_term(w_term, size):
            el.read(dt_range, dump_org=False)

        am = arguments.ArgumentManager(conf)
        am.generate(arguments.all_args)

        from logdag import makedag
        edge_cnt = 0
        for args in am:
            ldag = makedag.makedag_main(args, do_dump=False)
            edge_cnt += ldag.number_of_edges()
        assert edge_cnt > 0
示例#4
0
    def get_df(self,
               measure,
               d_tags,
               fields,
               dt_range,
               str_bin=None,
               func=None,
               fill=None,
               limit=None):
        if fields is None:
            fields = self.list_fields(measure)

        cursor = self._get(measure, d_tags, fields, dt_range)
        l_dt = []
        l_values = []
        for rid, row in enumerate(cursor):
            if limit is not None and rid >= limit:
                break
            dtstr, values = self._get_row_values(row)
            # obtained as naive(utc), converted into aware(local)
            l_dt.append(self.pdtimestamp(self._db.strptime(dtstr)))
            if fill:
                values = values.nan_to_num(fill)
            l_values.append(values)

        sortidx = np.argsort(l_dt)
        sorted_l_dt = [l_dt[idx] for idx in sortidx]
        sorted_l_values = [l_values[idx] for idx in sortidx]

        if func is None:
            dtindex = self.pdtimestamps(l_dt)
            return pd.DataFrame(l_values, index=dtindex, columns=fields)
        elif func == "sum":
            assert str_bin is not None
            binsize = config.str2dur(str_bin)
            dtindex = self.pdtimestamps(
                dtutil.range_dt(dt_range[0], dt_range[1], binsize))

            d_values = {}
            if len(l_dt) == 0:
                for field in fields:
                    d_values[field] = [float(0)] * len(dtindex)
            else:
                for fid, series in enumerate(zip(*sorted_l_values)):
                    a_cnt = dtutil.discretize_sequential(sorted_l_dt,
                                                         dt_range,
                                                         binsize,
                                                         l_dt_values=series)
                    d_values[fields[fid]] = a_cnt

            return pd.DataFrame(d_values, index=dtindex)
        else:
            raise NotImplementedError
示例#5
0
    def test_anonymize_restore(self):
        from amulog import __main__ as amulog_main
        from amulog import manager
        targets = amulog_main.get_targets_conf(self._amulog_conf)
        manager.process_files_online(self._amulog_conf, targets, reset_db=True)

        from amulog import anonymize
        am = anonymize.AnonymizeMapper(self._amulog_conf)
        am.anonymize(self._amulog_conf_anonymize)
        am.dump()

        conf = config.open_config(arguments.DEFAULT_CONFIG,
                                  base_default=False)
        conf["general"]["evdb"] = "sql"
        conf["database_sql"]["database"] = "sqlite3"
        conf["database_amulog"]["source_conf"] = self._path_amulogconf_anonymize
        conf["database_amulog"]["use_anonymize_mapping"] = "true"
        conf["database_amulog"]["given_amulog_database"] = "original"
        conf["database_sql"]["sqlite3_filename"] = self._path_testdb
        conf["dag"]["event_detail_cache"] = "false"

        conf["filter"]["rules"] = ""

        from logdag import dtutil
        from logdag.source import evgen_log
        w_term = self._whole_term
        size = config.str2dur("1d")
        el = evgen_log.LogEventLoader(conf)
        for dt_range in dtutil.iter_term(w_term, size):
            el.read(dt_range, dump_org=False)

        am = arguments.ArgumentManager(conf)
        am.generate(arguments.all_args)

        from logdag import makedag
        from logdag import showdag
        edge_cnt = 0
        for args in am:
            conf["database_amulog"]["source_conf"] = self._path_amulogconf_anonymize
            ldag = makedag.makedag_main(args, do_dump=False)
            conf["database_amulog"]["source_conf"] = self._path_amulogconf
            showdag.show_subgraphs(ldag, "detail",
                                   load_cache=False, graph=None)
            # print(showdag.show_subgraphs(ldag, "detail",
            #                              load_cache=False, graph=None))
            edge_cnt += ldag.number_of_edges()
        assert edge_cnt > 0
示例#6
0
文件: tsdb.py 项目: mousewu/logdag
def filter_periodic(conf, ld, l_dt, dt_range, evdef, method):
    """Return True and the interval if a_cnt is periodic."""

    ret_false = False, None, None
    gid_name = conf.get("dag", "event_gid")
    p_cnt = conf.getint("filter", "pre_count")
    p_term = config.getdur(conf, "filter", "pre_term")

    # preliminary test
    if len(l_dt) < p_cnt:
        _logger.debug("time-series count too small, skip")
        return ret_false
    elif max(l_dt) - min(l_dt) < p_term:
        _logger.debug("time-series range too small, skip")
        return ret_false

    # periodicity test
    for dt_cond in config.gettuple(conf, "filter", "sample_rule"):
        dt_length, binsize = [config.str2dur(s) for s in dt_cond.split("_")]
        if (dt_range[1] - dt_range[0]) == dt_length:
            temp_l_dt = l_dt
        else:
            temp_l_dt = reload_ts(ld, evdef, dt_length, dt_range, gid_name)
        a_cnt = dtutil.discretize_sequential(temp_l_dt,
                                             dt_range,
                                             binsize,
                                             binarize=False)

        remain_dt = None
        if method == "remove":
            flag, interval = period.fourier_remove(conf, a_cnt, binsize)
        elif method == "replace":
            flag, remain_array, interval = period.fourier_replace(
                conf, a_cnt, binsize)
            if remain_array is not None:
                remain_dt = revert_event(remain_array, dt_range, binsize)
        elif method == "corr":
            flag, interval = period.periodic_corr(conf, a_cnt, binsize)
        else:
            raise NotImplementedError
        if flag:
            return flag, remain_dt, interval
    return ret_false