def __init__(self, depth=4, sim_th=0.4, max_children=100, max_clusters=None, extra_delimiters=(), profiler: Profiler = NullProfiler()): """ Attributes ---------- depth : depth of all leaf nodes (nodes that contain log clusters) sim_th : similarity threshold - if percentage of similar tokens for a log message is below this number, a new log cluster will be created. max_children : max number of children of an internal node max_clusters : max number of tracked clusters (unlimited by default). When this number is reached, model starts replacing old clusters with a new ones according to the LRU policy. extra_delimiters: delimiters to apply when splitting log message into words (in addition to whitespace). """ self.depth = depth - 2 # number of prefix tokens in each tree path (exclude root and leaf node) self.sim_th = sim_th self.max_children = max_children self.root_node = Node() self.profiler = profiler self.extra_delimiters = extra_delimiters self.max_clusters = max_clusters # key: int, value: LogCluster self.id_to_cluster = {} if max_clusters is None else LRUCache( maxsize=max_clusters) self.clusters_counter = 0
def __init__(self, persistence_handler: PersistenceHandler = None, config: TemplateMinerConfig = None): """ Wrapper for Drain with persistence and masking support :param persistence_handler: The type of persistence to use. When None, no persistence is applied. :param config: Configuration object. When none, configuration is loaded from default .ini file (if exist) """ logger.info("Starting Drain3 template miner") if config is None: logger.info(f"Loading configuration from {config_filename}") config = TemplateMinerConfig() config.load(config_filename) self.config = config self.profiler: Profiler = NullProfiler() if self.config.profiling_enabled: self.profiler = SimpleProfiler() self.persistence_handler = persistence_handler self.drain = Drain(sim_th=self.config.drain_sim_th, depth=self.config.drain_depth, max_children=self.config.drain_max_children, max_clusters=self.config.drain_max_clusters, extra_delimiters=self.config.drain_extra_delimiters, profiler=self.profiler) self.masker = LogMasker(self.config.masking_instructions) self.last_save_time = time.time() if persistence_handler is not None: self.load_state()
def __init__(self, depth=4, sim_th=0.4, max_children=100, max_clusters=None, extra_delimiters=(), profiler: Profiler = NullProfiler(), param_str="<*>", parametrize_numeric_tokens=True): """ Create a new Drain instance. :param depth: max depth levels of log clusters. Minimum is 2. For example, for depth==4, Root is considered depth level 1. Token count is considered depth level 2. First log token is considered depth level 3. Log clusters below first token node are considered depth level 4. :param sim_th: similarity threshold - if percentage of similar tokens for a log message is below this number, a new log cluster will be created. :param max_children: max number of children of an internal node :param max_clusters: max number of tracked clusters (unlimited by default). When this number is reached, model starts replacing old clusters with a new ones according to the LRU policy. :param extra_delimiters: delimiters to apply when splitting log message into words (in addition to whitespace). :param parametrize_numeric_tokens: whether to treat tokens that contains at least one digit as template parameters. """ if depth < 3: raise ValueError("depth argument must be at least 3") self.log_cluster_depth = depth self.max_node_depth = depth - 2 # max depth of a prefix tree node, starting from zero self.sim_th = sim_th self.max_children = max_children self.root_node = Node() self.profiler = profiler self.extra_delimiters = extra_delimiters self.max_clusters = max_clusters self.param_str = param_str self.parametrize_numeric_tokens = parametrize_numeric_tokens # key: int, value: LogCluster self.id_to_cluster = {} if max_clusters is None else LogClusterCache( maxsize=max_clusters) self.clusters_counter = 0
def __init__(self, persistence_handler: PersistenceHandler = None): logger.info("Starting Drain3 template miner") self.config = configparser.ConfigParser() self.config.read(config_filename) self.profiler: Profiler = NullProfiler() self.profiling_report_sec = self.config.getint('PROFILING', 'report_sec', fallback=60) if self.config.getboolean('PROFILING', 'enabled', fallback=False): self.profiler = SimpleProfiler() self.persistence_handler = persistence_handler self.snapshot_interval_seconds = self.config.getint( 'SNAPSHOT', 'snapshot_interval_minutes', fallback=5) * 60 self.compress_state = self.config.getboolean('SNAPSHOT', 'compress_state', fallback=True) extra_delimiters = self.config.get('DRAIN', 'extra_delimiters', fallback="[]") extra_delimiters = ast.literal_eval(extra_delimiters) self.drain = Drain(sim_th=self.config.getfloat('DRAIN', 'sim_th', fallback=0.4), depth=self.config.getint('DRAIN', 'depth', fallback=4), max_children=self.config.getint('DRAIN', 'max_children', fallback=100), max_clusters=self.config.getint('DRAIN', 'max_clusters', fallback=None), extra_delimiters=extra_delimiters, profiler=self.profiler) self.masker = LogMasker(self.config) self.last_save_time = time.time() if persistence_handler is not None: self.load_state()
def __init__(self, depth=4, sim_th=0.4, max_children=100, extra_delimiters=(), profiler: Profiler = NullProfiler()): """ Attributes ---------- depth : depth of all leaf nodes (nodes that contain log clusters) sim_th : similarity threshold - if percentage of similar tokens for a log message is below this number, a new log cluster will be created. max_children : max number of children of an internal node extra_delimiters: delimiters to apply when splitting log message into words (in addition to whitespace). """ self.depth = depth - 2 # number of prefix tokens in each tree path (exclude root and leaf node) self.sim_th = sim_th self.max_children = max_children self.clusters = [] self.root_node = Node("(ROOT)", 0) self.profiler = profiler self.extra_delimiters = extra_delimiters