def test2persist_multiple_objects_of_multiple_class(self):
        """In this test case multiple instances of multiple classes are to be persisted and loaded."""
        description = "Test2PersistenceUtil"
        new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler],
            'Default2', True)
        self.analysis_context.register_component(new_match_path_detector,
                                                 description)

        t = time.time()
        log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data,
                                     ParserMatch(self.match_element_fixed_dme),
                                     t, new_match_path_detector)
        log_atom_decimal_integer_value_me = LogAtom(
            self.match_context_decimal_integer_value_me.match_data,
            ParserMatch(self.match_element_decimal_integer_value_me), t,
            new_match_path_detector)
        new_match_path_detector.receive_atom(log_atom_fixed_dme)
        new_match_path_detector.receive_atom(log_atom_decimal_integer_value_me)

        other_new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler],
            'otherDetector2', True)
        self.analysis_context.register_component(other_new_match_path_detector,
                                                 description + "2")
        log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data,
                                     ParserMatch(self.match_element_fixed_dme),
                                     t, other_new_match_path_detector)
        other_new_match_path_detector.receive_atom(log_atom_fixed_dme)

        new_match_path_value_combo_detector = NewMatchPathValueComboDetector(
            self.aminer_config, ['first/f1/s1'],
            [self.stream_printer_event_handler], 'Default', False, True)
        self.analysis_context.register_component(
            new_match_path_value_combo_detector, description + "3")
        log_atom_sequence_me = LogAtom(
            self.fixed_dme.fixed_data,
            ParserMatch(self.match_element_first_match_me), t,
            new_match_path_value_combo_detector)
        new_match_path_value_combo_detector.receive_atom(log_atom_sequence_me)

        PersistenceUtil.persist_all()
        persistence_data = PersistenceUtil.load_json(
            new_match_path_detector.persistence_file_name)
        self.assertTrue(persistence_data in ([
            self.match_element_fixed_dme.get_path(),
            self.match_element_decimal_integer_value_me.get_path()
        ], [
            self.match_element_decimal_integer_value_me.get_path(),
            self.match_element_fixed_dme.get_path()
        ]))
        self.assertEqual(
            PersistenceUtil.load_json(
                other_new_match_path_detector.persistence_file_name),
            [self.match_element_fixed_dme.get_path()])
        self.assertEqual(
            PersistenceUtil.load_json(
                new_match_path_value_combo_detector.persistence_file_name),
            ([[log_atom_sequence_me.raw_data]]))
Exemplo n.º 2
0
 def load_persistence_data(self):
     """Load the persistence data from storage."""
     persistence_data = PersistenceUtil.load_json(self.persistence_file_name)
     if persistence_data is not None:
         # Set and tuples were stored as list of lists. Transform the inner lists to tuples to allow hash operation needed by set.
         self.known_values_set = {tuple(record) for record in persistence_data}
         logging.getLogger(DEBUG_LOG_NAME).debug('%s loaded persistence data.', self.__class__.__name__)
    def __init__(self,
                 aminer_config,
                 target_path_list,
                 anomaly_event_handlers,
                 persistence_id='Default',
                 auto_include_flag=False,
                 output_log_line=True):
        """Initialize the detector. This will also trigger reading or creation of persistence storage location."""
        self.target_path_list = target_path_list
        self.anomaly_event_handlers = anomaly_event_handlers
        self.auto_include_flag = auto_include_flag
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config
        self.next_persist_time = time.time(
        ) + self.aminer_config.config_properties.get(
            KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD)
        self.persistence_id = persistence_id

        self.log_success = 0
        self.log_total = 0
        self.log_learned_path_values = 0
        self.log_new_learned_values = []

        self.persistence_file_name = build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)
        if persistence_data is None:
            self.known_values_set = set()
        else:
            self.known_values_set = set(persistence_data)
            logging.getLogger(DEBUG_LOG_NAME).debug(
                '%s loaded persistence data.', self.__class__.__name__)
    def __init__(self, aminer_config, property_path, bin_definition, report_interval, report_event_handlers, reset_after_report_flag=True,
                 persistence_id='Default', output_log_line=True):
        """
        Initialize the analysis component.
        @param report_interval delay in seconds between creation of two reports. The parameter is applied to the parsed record data
        time, not the system time. Hence reports can be delayed when no data is received.
        """
        self.last_report_time = None
        self.next_report_time = 0.0
        self.property_path = property_path
        self.bin_definition = bin_definition
        self.histogram_data = {}
        self.report_interval = report_interval
        self.report_event_handlers = report_event_handlers
        self.reset_after_report_flag = reset_after_report_flag
        self.persistence_id = persistence_id
        self.next_persist_time = None
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config

        self.persistence_file_name = build_persistence_file_name(aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(self.persistence_file_name)
        if persistence_data is not None:
            msg = 'No data reading, def merge yet'
            logging.getLogger(DEBUG_LOG_NAME).error(msg)
            raise Exception(msg)
    def __init__(self,
                 aminer_config,
                 anomaly_event_handlers,
                 id_path_list,
                 target_path_list=None,
                 persistence_id='Default',
                 auto_include_flag=False,
                 output_log_line=True,
                 ignore_list=None,
                 constraint_list=None):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param aminer_config configuration from analysis_context.
        @param anomaly_event_handlers for handling events, e.g., print events to stdout.
        @param id_path_list to specify group identifiers for which numeric ranges should be learned.
        @param target_path_list parser paths of values to be analyzed. Multiple paths mean that all values occurring in these paths
        are considered for value range generation.
        @param persistence_id name of persistency document.
        @param auto_include_flag specifies whether value ranges should be extended when values outside of ranges are observed.
        @param output_log_line specifies whether the full parsed log atom should be provided in the output.
        @param ignore_list list of paths that are not considered for analysis, i.e., events that contain one of these paths are
        omitted.
        @param constrain_list list of paths that have to be present in the log atom to be analyzed.
        """
        self.target_path_list = target_path_list
        self.anomaly_event_handlers = anomaly_event_handlers
        self.auto_include_flag = auto_include_flag
        self.next_persist_time = None
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config
        self.persistence_id = persistence_id
        self.id_path_list = id_path_list
        if constraint_list is None:
            self.constraint_list = []
        else:
            self.constraint_list = set(constraint_list)
        if ignore_list is None:
            self.ignore_list = []
        else:
            self.ignore_list = set(ignore_list)
        self.log_total = 0
        self.log_success = 0

        self.ranges_min = {}
        self.ranges_max = {}

        # Persisted data consists of min and max values for each identifier, i.e.,
        # [["min", [<id1, id2, ...>], <min_value>], ["max", [<id1, id2, ...>], <max_value>]]
        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)
        if persistence_data is not None:
            for l in persistence_data:
                if l[0] == 'min':
                    self.ranges_min[tuple(l[1])] = float(l[2])
                elif l[0] == 'max':
                    self.ranges_max[tuple(l[1])] = float(l[2])
    def load_persistence_data(self):
        """Load the persistence data from storage."""
        persistence_data = PersistenceUtil.load_json(self.persistence_file_name)

        if persistence_data is not None:
            self.target_path_index_list = persistence_data[0]
            self.period_length_list = persistence_data[1]
            self.prediction_history = persistence_data[2]
 def load_persistence_data(self):
     """Load the persistence data from storage."""
     persistence_data = PersistenceUtil.load_json(self.persistence_file_name)
     if persistence_data is not None:
         # Dictionary and tuples were stored as list of lists. Transform
         # the first lists to tuples to allow hash operation needed by set.
         for value_tuple, extra_data in persistence_data:
             self.known_values_dict[tuple(value_tuple)] = extra_data
         logging.getLogger(DEBUG_LOG_NAME).debug('%s loaded persistence data.', self.__class__.__name__)
Exemplo n.º 8
0
    def __init__(self,
                 aminer_config,
                 target_path_list,
                 anomaly_event_handlers,
                 persistence_id='Default',
                 auto_include_flag=False,
                 default_interval=3600,
                 realert_interval=86400,
                 output_log_line=True):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param target_path_list to extract a source identification value from each logatom.
        """
        self.target_path_list = target_path_list
        self.anomaly_event_handlers = anomaly_event_handlers
        self.auto_include_flag = auto_include_flag
        self.default_interval = default_interval
        self.realert_interval = realert_interval
        # This timestamps is compared with timestamp values from log atoms for activation of alerting logic. The first timestamp from logs
        # above this value will trigger alerting.
        self.next_check_timestamp = 0
        self.last_seen_timestamp = 0
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config
        self.next_persist_time = time.time(
        ) + self.aminer_config.config_properties.get(
            KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD)
        self.persistence_id = persistence_id

        self.log_success = 0
        self.log_total = 0
        self.log_learned_values = 0
        self.log_new_learned_values = []

        self.persistence_file_name = build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)
        self.expected_values_dict = {}
        if persistence_data is not None:
            for key in persistence_data:
                value = persistence_data[key]
                if self.target_path_list is not None:  # skipcq: PTC-W0048
                    if value[3] != self.target_path_list:
                        continue
                elif self.target_path_list is not None and value[
                        3] not in self.target_path_list:
                    continue
                if value[1] != default_interval:
                    value[1] = default_interval
                    value[2] = value[0] + default_interval
                self.expected_values_dict[key] = value
            logging.getLogger(DEBUG_LOG_NAME).debug(
                '%s loaded persistence data.', self.__class__.__name__)
        self.analysis_string = 'Analysis.%s'
    def __init__(self, aminer_config, anomaly_event_handlers, id_path_list=None, target_path_list=None, seq_len=3, persistence_id='Default',
                 auto_include_flag=False, output_log_line=True, ignore_list=None, constraint_list=None):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param aminer_config configuration from analysis_context.
        @param target_path_list parser paths of values to be analyzed. Multiple paths mean that values are analyzed by their combined
        occurrences. When no paths are specified, the events given by the full path list are analyzed.
        @param anomaly_event_handlers for handling events, e.g., print events to stdout.
        @param id_path_list one or more paths that specify the trace of the sequence detection, i.e., incorrect sequences that are
        generated by interleaved events can be avoided when event sequence identifiers are available.
        @param seq_len the length of the sequences to be learned (larger lengths increase precision, but may overfit the data).
        @param persistence_id name of persistency document.
        @param auto_include_flag specifies whether new frequency measurements override ground truth frequencies.
        @param output_log_line specifies whether the full parsed log atom should be provided in the output.
        @param ignore_list list of paths that are not considered for analysis, i.e., events that contain one of these paths are
        omitted. The default value is [] as None is not iterable.
        @param constrain_list list of paths that have to be present in the log atom to be analyzed.
        """
        self.target_path_list = target_path_list
        self.anomaly_event_handlers = anomaly_event_handlers
        self.auto_include_flag = auto_include_flag
        self.next_persist_time = None
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config
        self.persistence_id = persistence_id
        self.id_path_list = id_path_list
        if id_path_list is None:
            self.id_path_list = []
        self.constraint_list = constraint_list
        if self.constraint_list is None:
            self.constraint_list = []
        self.ignore_list = ignore_list
        if self.ignore_list is None:
            self.ignore_list = []
        self.seq_len = seq_len
        self.sequences = set()
        self.current_sequences = {}
        self.log_total = 0
        self.log_success = 0
        self.log_learned = 0
        self.log_learned_sequences = []

        self.persistence_file_name = AminerConfig.build_persistence_file_name(aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)

        # Persisted data contains lists of sequences, i.e., [[<seq1_elem1>, <seq1_elem2>], [<seq2_elem1, ...], ...]
        # Thereby, sequence elements may be tuples, i.e., combinations of values, or paths that define events.
        persistence_data = PersistenceUtil.load_json(self.persistence_file_name)
        if persistence_data is not None:
            for sequence in persistence_data:
                sequence_elem_tuple = []
                for sequence_elem in sequence:
                    sequence_elem_tuple.append(tuple(sequence_elem))
                self.sequences.add(tuple(sequence_elem_tuple))
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).debug('%s loaded persistence data.', self.__class__.__name__)
    def __init__(self,
                 aminer_config,
                 anomaly_event_handlers,
                 timestamp_path,
                 analyze_path_list,
                 min_bin_elements,
                 min_bin_time,
                 debug_mode=False,
                 persistence_id='Default',
                 output_log_line=True):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param timestamp_path if not None, use this path value for timestamp based bins.
        @param analyze_path_list list of match paths to analyze in this detector.
        @param min_bin_elements evaluate the latest bin only after at least that number of elements was added to it.
        @param min_bin_time evaluate the latest bin only when the first element is received after min_bin_time has elapsed.
        @param debug_mode if true, generate an analysis report even when average of last bin was within expected range.
        """
        self.anomaly_event_handlers = anomaly_event_handlers
        self.timestamp_path = timestamp_path
        self.min_bin_elements = min_bin_elements
        self.min_bin_time = min_bin_time
        self.debug_mode = debug_mode
        self.next_persist_time = None
        self.persistence_id = persistence_id
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config

        self.persistence_file_name = build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)
        self.stat_data = []
        for path in analyze_path_list:
            self.stat_data.append((
                path,
                [],
            ))
        if persistence_data is not None:
            for val in persistence_data:
                if isinstance(val, str):
                    val = val.strip('[').strip(']').split(',', 2)
                    path = val[0].strip('"')
                    values = val[1].strip(' ').strip('[').strip(']')
                else:
                    path = val[0]
                    values = val[1]
                index = 0
                for p, _ in self.stat_data:
                    if p == path:
                        break
                    index += 1
                for value in values:
                    self.stat_data[index][1].append(value)
Exemplo n.º 11
0
 def load_persistence_data(self):
     """Load the persistence data from storage."""
     persistence_data = PersistenceUtil.load_json(
         self.persistence_file_name)
     self.known_values = []
     if persistence_data is not None:
         # Combinations are stored as list of dictionaries
         for record in persistence_data:
             self.known_values.append(record)
         logging.getLogger(DEBUG_LOG_NAME).debug(
             '%s loaded persistence data.', self.__class__.__name__)
Exemplo n.º 12
0
    def __init__(self, aminer_config, anomaly_event_handlers, target_path_list=None, window_size=600, confidence_factor=0.5,
                 persistence_id='Default', auto_include_flag=False, output_log_line=True, ignore_list=None, constraint_list=None):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param aminer_config configuration from analysis_context.
        @param target_path_list parser paths of values to be analyzed. Multiple paths mean that values are analyzed by their combined
        occurrences. When no paths are specified, the events given by the full path list are analyzed.
        @param anomaly_event_handlers for handling events, e.g., print events to stdout.
        @param window_size the length of the time window for counting in seconds.
        @param confidence_factor defines range of tolerable deviation of measured frequency from ground truth frequency gt by
        [gf * confidence_factor, gf / confidence_factor]. confidence_factor must be in range [0, 1].
        @param persistence_id name of persistency document.
        @param auto_include_flag specifies whether new frequency measurements override ground truth frequencies.
        @param output_log_line specifies whether the full parsed log atom should be provided in the output.
        @param ignore_list list of paths that are not considered for analysis, i.e., events that contain one of these paths are
        omitted. The default value is [] as None is not iterable.
        @param constrain_list list of paths that have to be present in the log atom to be analyzed.
        """
        self.target_path_list = target_path_list
        self.anomaly_event_handlers = anomaly_event_handlers
        self.auto_include_flag = auto_include_flag
        self.next_persist_time = None
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config
        self.persistence_id = persistence_id
        self.constraint_list = constraint_list
        if self.constraint_list is None:
            self.constraint_list = []
        self.ignore_list = ignore_list
        if self.ignore_list is None:
            self.ignore_list = []
        self.window_size = window_size
        if not 0 <= confidence_factor <= 1:
            logging.getLogger(DEBUG_LOG_NAME).warning('confidence_factor must be in the range [0,1]!')
            confidence_factor = 1
        self.confidence_factor = confidence_factor
        self.next_check_time = None
        self.counts = {}
        self.counts_prev = {}
        self.log_total = 0
        self.log_success = 0
        self.log_windows = 0

        self.persistence_file_name = build_persistence_file_name(aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)

        # Persisted data contains lists of event-frequency pairs, i.e., [[<ev1, ev2>, <freq>], [<ev1, ev2>, <freq>], ...]
        persistence_data = PersistenceUtil.load_json(self.persistence_file_name)
        if persistence_data is not None:
            for entry in persistence_data:
                log_event = entry[0]
                frequency = entry[1]
                self.counts_prev[tuple(log_event)] = frequency
            logging.getLogger(DEBUG_LOG_NAME).debug('%s loaded persistence data.', self.__class__.__name__)
Exemplo n.º 13
0
    def __init__(self,
                 aminer_config,
                 anomaly_event_handlers,
                 parallel_check_count,
                 persistence_id='Default',
                 record_count_before_event=10000,
                 output_log_line=True,
                 use_path_match=True,
                 use_value_match=True,
                 min_rule_attributes=1,
                 max_rule_attributes=5):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param parallel_check_count number of rule detection checks to run in parallel.
        @param record_count_before_event number of events used to calculate statistics (i.e., window size)
        @param min_rule_attributes minimum number of attributes forming a rule
        @param max_rule_attributes maximum number of attributes forming a rule
        @param use_path_match if true rules are build based on path existance
        @param use_value_match if true rules are built based on actual values
        """
        self.last_timestamp = 0.0
        self.parallel_check_count = parallel_check_count
        self.anomaly_event_handlers = anomaly_event_handlers
        self.min_rule_attributes = min_rule_attributes
        self.max_rule_attributes = max_rule_attributes
        self.last_unhandled_match = None
        self.total_records = 0
        self.record_count_before_event = record_count_before_event
        self.persistence_id = persistence_id
        self.output_log_line = output_log_line
        self.use_path_match = use_path_match
        self.use_value_match = use_value_match
        self.aminer_config = aminer_config
        self.next_persist_time = time.time(
        ) + self.aminer_config.config_properties.get(
            KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD)

        self.persistence_file_name = build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)
        if persistence_data is None:
            self.feature_list = []
            self.event_count_table = [
                0
            ] * parallel_check_count * parallel_check_count * 2
            self.event_delta_table = [
                0
            ] * parallel_check_count * parallel_check_count * 2
        else:
            logging.getLogger(DEBUG_LOG_NAME).debug(
                '%s loaded persistence data.', self.__class__.__name__)
 def load_persistence_data(self):
     """Load the persistence data from storage."""
     persistence_data = PersistenceUtil.load_json(
         self.persistence_file_name)
     if persistence_data is not None:
         for match_value_tuple, time_list in persistence_data[0]:
             self.appeared_time_list[tuple(match_value_tuple)] = time_list
         for match_value_tuple, counter in persistence_data[1]:
             self.counter_reduce_time_intervals[tuple(
                 match_value_tuple)] = counter
     logging.getLogger(AminerConfig.DEBUG_LOG_NAME).debug(
         '%s loaded persistence data.', self.__class__.__name__)
    def __init__(self,
                 aminer_config,
                 histogram_defs,
                 report_interval,
                 report_event_handlers,
                 reset_after_report_flag=True,
                 persistence_id='Default',
                 output_log_line=True):
        """
        Initialize the analysis component.
        @param histogram_defs is a list of tuples containing the target property path to analyze and the BinDefinition to apply for
        binning.
        @param report_interval delay in seconds between creation of two reports. The parameter is applied to the parsed record data
        time, not the system time. Hence reports can be delayed when no data is received.
        """
        self.last_report_time = None
        self.next_report_time = 0.0
        self.histogram_data = []
        for (path, bin_definition) in histogram_defs:
            self.histogram_data.append(HistogramData(path, bin_definition))
        self.report_interval = report_interval
        self.report_event_handlers = report_event_handlers
        self.reset_after_report_flag = reset_after_report_flag
        self.persistence_id = persistence_id
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config
        self.next_persist_time = time.time(
        ) + self.aminer_config.config_properties.get(
            KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD)

        self.persistence_file_name = build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)
        if persistence_data is not None:
            msg = 'No data reading, def merge yet'
            logging.getLogger(DEBUG_LOG_NAME).error(msg)
            raise Exception(msg)
    def __init__(self, aminer_config, anomaly_event_handlers, persistence_id='Default', auto_include_flag=False, output_log_line=True):
        """Initialize the detector. This will also trigger reading or creation of persistence storage location."""
        self.anomaly_event_handlers = anomaly_event_handlers
        self.auto_include_flag = auto_include_flag
        self.next_persist_time = None
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config
        self.persistence_id = persistence_id

        self.log_success = 0
        self.log_total = 0
        self.log_learned_paths = 0
        self.log_new_learned_paths = []

        PersistenceUtil.add_persistable_component(self)
        self.persistence_file_name = AMinerConfig.build_persistence_file_name(aminer_config, self.__class__.__name__, persistence_id)
        persistence_data = PersistenceUtil.load_json(self.persistence_file_name)
        if persistence_data is None:
            self.known_path_set = set()
        else:
            self.known_path_set = set(persistence_data)
            logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).debug('%s loaded persistence data.', self.__class__.__name__)
    def __init__(self,
                 aminer_config,
                 target_path_list,
                 anomaly_event_handlers,
                 window_size,
                 min_anomaly_score,
                 min_variance,
                 num_windows,
                 persistence_id='Default',
                 auto_include_flag=False,
                 output_log_line=True,
                 ignore_list=None,
                 constraint_list=None):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param aminer_config configuration from analysis_context.
        @param target_path_list parser paths of values to be analyzed. Multiple paths mean that values are analyzed as separate
        dimensions. When no paths are specified, the events given by the full path list are analyzed (one dimension).
        @param anomaly_event_handlers for handling events, e.g., print events to stdout.
        @param window_size the length of the time window for counting in seconds.
        @param min_anomaly_score the minimum computed outlier score for reporting anomalies. Scores are scaled by training data, i.e.,
        reasonable minimum scores are >1 to detect outliers with respect to currently trained PCA matrix.
        @param min_variance the minimum variance covered by the principal components in range [0, 1].
        @param num_windows the number of time windows in the sliding window approach. Total covered time span = window_size * num_windows.
        @param persistence_id name of persistency document.
        @param auto_include_flag specifies whether new count measurements are added to the PCA count matrix.
        @param output_log_line specifies whether the full parsed log atom should be provided in the output.
        @param ignore_list list of paths that are not considered for analysis, i.e., events that contain one of these paths are
        omitted. The default value is [] as None is not iterable.
        @param constrain_list list of paths that have to be present in the log atom to be analyzed.
        """
        self.target_path_list = target_path_list
        self.anomaly_event_handlers = anomaly_event_handlers
        self.auto_include_flag = auto_include_flag
        self.next_persist_time = None
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config
        self.persistence_id = persistence_id
        self.block_time = window_size
        self.anomaly_score_threshold = min_anomaly_score
        self.variance_threshold = min_variance
        if num_windows < 3:
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).warning(
                'num_windows must be >= 3!')
            self.num_windows = 3
        else:
            self.num_windows = num_windows
        self.first_log = True
        self.start_time = 0
        self.constraint_list = constraint_list
        self.event_count_matrix = []
        self.feature_list = []
        self.ecm = None
        if self.constraint_list is None:
            self.constraint_list = []
        self.ignore_list = ignore_list
        if self.ignore_list is None:
            self.ignore_list = []
        self.log_total = 0
        self.log_success = 0
        self.log_windows = 0

        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)

        if persistence_data is not None:
            self.event_count_matrix = list(persistence_data)
            self.compute_pca()
            # Copy feature list into event count vector and reset counts of each feature
            self.event_count_vector = copy.deepcopy(self.event_count_matrix[0])
            self.reset_event_count_vector()
        else:
            if self.target_path_list is None or len(
                    self.target_path_list) == 0:
                # Only one dimension when events are used instead of values; use empty string as placeholder
                self.event_count_vector = {'': {}}
            else:
                self.event_count_vector = {}
    def __init__(self,
                 aminer_config,
                 anomaly_event_handlers,
                 persistence_id='Default',
                 path_list=None,
                 id_path_list=None,
                 allow_missing_id=False,
                 allowed_id_tuples=None,
                 min_num_vals=1000,
                 max_num_vals=1500,
                 save_values=True,
                 track_time_for_tsa=False,
                 waiting_time_for_tsa=1000,
                 num_sections_waiting_time_for_tsa=100):
        """Initialize the detector. This will also trigger reading or creation of persistence storage location."""
        self.anomaly_event_handlers = anomaly_event_handlers
        # one or more paths that specify the trace of the sequence detection, i.e., incorrect sequences that are generated by interleaved
        # events can be avoided when event sequence identifiers are available (list of strings, defaults to empty list).
        self.id_path_list = id_path_list
        if self.id_path_list is None:
            self.id_path_list = []
        # Specifies whether log atoms without id path should be omitted (only if id path is set).
        self.allow_missing_id = allow_missing_id
        # List of the allowed id tuples. Log atoms with id tuples not in this list are not analyzed, when this list is not empty.
        if allowed_id_tuples is None:
            self.allowed_id_tuples = []
        else:
            self.allowed_id_tuples = [
                tuple(tuple_list) for tuple_list in allowed_id_tuples
            ]
        # Number of the values which the list is being reduced to.
        self.min_num_vals = min_num_vals
        # Maximum number of lines in the value list before it is reduced. > min_num_vals.
        self.max_num_vals = max_num_vals
        # If False the values of the Token are not saved for further analysis. Disables self.values, and self.check_variables
        self.save_values = save_values
        # States if the time windows should be tracked for the time series analysis
        self.track_time_for_tsa = track_time_for_tsa
        # Time in seconds, until the time windows are being initialized
        self.waiting_time_for_tsa = waiting_time_for_tsa
        # Number of sections of the initialization window. The length of the input-list of the calculate_time_steps is this number
        self.num_sections_waiting_time_for_tsa = num_sections_waiting_time_for_tsa
        self.aminer_config = aminer_config
        self.next_persist_time = time.time(
        ) + self.aminer_config.config_properties.get(
            KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD)

        self.num_events = 0
        # List of the longest path of the events
        self.longest_path = []
        # List of the keys corresponding to the events
        self.found_keys = []
        # List of the keys, which take values in the log-line
        self.variable_key_list = []
        # List of the values of the log-lines. If the lenght reaches max_num_vals the list gets reduced to min_num_vals values per variable
        self.values = []
        # Saves the number of lines of the event types
        self.num_eventlines = []
        # Saves the number of total log-lines
        self.total_records = 0
        # List of the modules which follow the event_type_detector. The implemented modules are form the list
        # [variableTypeDetector, variableCorrelationDetector]
        self.following_modules = []
        # List of paths, which variables are being tracked. All other paths will not get tracked. If None all paths are being tracked.
        self.path_list = path_list
        # List of bools, which state if the variables of variable_key_list are updated.
        self.check_variables = []
        # List ot the time trigger. The first list states the times when something should be triggered, the second list states the indices
        # of the eventtyps, or a list of the evnettype, a path and a value which should be counted (-1 for an initialization)
        # the third list states, the length of the time step (-1 for a one time trigger)
        self.etd_time_trigger = [[], [], []]
        # Reference containing the number of lines of the events for the TSA
        self.num_eventlines_tsa_ref = []
        # Index of the eventtype of the current log line
        self.current_index = 0
        # List of the id tuples
        self.id_path_list_tuples = []

        # Loads the persistence
        self.persistence_file_name = build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)

        # Imports the persistence
        if persistence_data is not None:
            for key in persistence_data[0]:
                self.found_keys.append(set(key))
            self.variable_key_list = persistence_data[1]
            self.values = persistence_data[2]
            self.longest_path = persistence_data[3]
            self.check_variables = persistence_data[4]
            self.num_eventlines = persistence_data[5]
            self.etd_time_trigger = persistence_data[6]
            self.num_eventlines_tsa_ref = persistence_data[7]
            self.id_path_list_tuples = [
                tuple(tuple_list) for tuple_list in persistence_data[8]
            ]

            self.num_events = len(self.found_keys)
        else:
            if self.track_time_for_tsa:
                self.etd_time_trigger[0].append(-1)
                self.etd_time_trigger[1].append(-1)
                self.etd_time_trigger[2].append(-1)
Exemplo n.º 19
0
    def __init__(self,
                 aminer_config,
                 anomaly_event_handlers,
                 event_type_detector,
                 acf_pause_interval_percentage=0.2,
                 acf_auto_pause_interval=True,
                 acf_auto_pause_interval_num_min=10,
                 build_sum_over_values=False,
                 num_periods_tsa_ini=15,
                 num_division_time_step=10,
                 alpha=0.05,
                 num_min_time_history=20,
                 num_max_time_history=30,
                 num_results_bt=15,
                 alpha_bt=0.05,
                 acf_threshold=0.2,
                 round_time_inteval_threshold=0.02,
                 force_period_length=False,
                 set_period_length=604800,
                 min_log_lines_per_time_step=10,
                 persistence_id='Default',
                 path_list=None,
                 ignore_list=None,
                 output_log_line=True,
                 auto_include_flag=True):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param aminer_config configuration from analysis_context.
        @param anomaly_event_handlers for handling events, e.g., print events to stdout.
        @param event_type_detector used to track the number of events in the time windows.
        @param acf_pause_interval_percentage states which area of the resutls of the ACF are not used to find the highest peak.
        @param acf_auto_pause_interval states if the pause area is automatically set.
        If enabled, the variable acf_pause_interval_percentage loses its functionality.
        @param acf_auto_pause_interval_num_min states the number of values in which a local minima must be the minimum, to be considered a
        local minimum of the function and not an outlier.
        @param build_sum_over_values states if the sum of a series of counts is build before applying the TSA.
        @param num_periods_tsa_ini number of periods used to initialize the Arima-model.
        @param num_division_time_step number of division of the time window to calculate the time step.
        @param alpha significance level of the estimated values.
        @param num_min_time_history number of lines processed before the period length is calculated.
        @param num_max_time_history maximum number of values of the time_history.
        @param num_results_bt number of results which are used in the binomial test.
        @param alpha_bt significance level for the bt test.
        @param round_time_inteval_threshold threshold for the rounding of the time_steps to the times in self.assumed_time_steps.
        The higher the threshold the easier the time is rounded to the next time in the list.
        @param acf_threshold threshold, which has to be exceeded by the highest peak of the cdf function of the time series, to be analysed.
        @param force_period_length states if the period length is calculated through the ACF, or if the period length is forced to
        be set to set_period_length.
        @param set_period_length states how long the period length is if force_period_length is set to True.
        @param min_log_lines_per_time_step states the minimal average number of log lines per time step to make a TSA.
        @param persistence_id name of persistency document.
        @param path_list At least one of the parser paths in this list needs to appear in the event to be analysed.
        @param ignore_list list of paths that are not considered for correlation, i.e., events that contain one of these paths are
        omitted. The default value is [] as None is not iterable.
        @param output_log_line specifies whether the full parsed log atom should be provided in the output.
        @param auto_include_flag specifies whether new frequency measurements override ground truth frequencies.
        """
        self.aminer_config = aminer_config
        self.next_persist_time = time.time(
        ) + self.aminer_config.config_properties.get(
            KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD)
        self.anomaly_event_handlers = anomaly_event_handlers
        self.output_log_line = output_log_line
        self.auto_include_flag = auto_include_flag
        self.path_list = path_list
        if self.path_list is None:
            self.path_list = []
        self.ignore_list = ignore_list
        if self.ignore_list is None:
            self.ignore_list = []

        self.event_type_detector = event_type_detector
        self.acf_pause_interval_percentage = acf_pause_interval_percentage
        self.acf_auto_pause_interval = acf_auto_pause_interval
        self.acf_auto_pause_interval_num_min = acf_auto_pause_interval_num_min
        self.build_sum_over_values = build_sum_over_values
        self.num_periods_tsa_ini = num_periods_tsa_ini
        self.num_division_time_step = num_division_time_step
        self.alpha = alpha
        self.num_min_time_history = num_min_time_history
        self.num_max_time_history = num_max_time_history
        self.num_results_bt = num_results_bt
        self.alpha_bt = alpha_bt
        self.round_time_inteval_threshold = round_time_inteval_threshold
        self.acf_threshold = acf_threshold
        self.force_period_length = force_period_length
        self.set_period_length = set_period_length
        self.min_log_lines_per_time_step = min_log_lines_per_time_step

        # Add the TSAArimaDetector-module to the list of the modules, which use the event_type_detector.
        self.event_type_detector.add_following_modules(self)

        # History of the time windows
        self.time_window_history = []
        # List of the the single arima_models (statsmodels)
        self.arima_models = []
        # List of the observed values and the predictions of the TSAArima
        self.prediction_history = []
        # List of the times of the observations
        self.time_history = []
        # List of the the results if th value was in the limits of the one step predictions
        self.result_list = []
        # Minimal number of successes for the binomial test
        self.bt_min_suc = self.bt_min_successes(self.num_results_bt,
                                                self.alpha, self.alpha_bt)
        # Assumed occuring time steps in seconds. 1 minute: 60, 1 hour: 3600, 12 hours: 43200, 1 day: 86400, 1 week: 604800.
        self.assumed_time_steps = [60, 3600, 43200, 86400, 604800]

        # Load the persistence
        self.persistence_id = persistence_id
        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)

        # Import the persistence
        if persistence_data is not None:
            self.time_window_history = persistence_data[0]

            self.arima_models = [None for _ in self.time_window_history]
            # skipcq: PTC-W0060
            for event_index in range(len(self.arima_models)):
                if len(
                        self.time_window_history[event_index]
                ) >= self.num_periods_tsa_ini * self.num_division_time_step:
                    try:
                        if not self.build_sum_over_values:
                            model = statsmodels.tsa.arima.model.ARIMA(
                                self.time_window_history[event_index]
                                [-self.num_periods_tsa_ini *
                                 self.num_division_time_step:],
                                order=(self.num_division_time_step, 0, 0),
                                seasonal_order=(0, 0, 0,
                                                self.num_division_time_step))
                            self.arima_models[event_index] = model.fit()
                        else:
                            model = statsmodels.tsa.arima.model.ARIMA(
                                [
                                    sum(self.time_window_history[event_index]
                                        [-self.num_periods_tsa_ini *
                                         self.num_division_time_step +
                                         i:-(self.num_periods_tsa_ini - 1) *
                                         self.num_division_time_step + i])
                                    for i in range(
                                        (self.num_periods_tsa_ini - 1) *
                                        self.num_division_time_step)
                                ] + [
                                    sum(self.time_window_history[event_index]
                                        [-self.num_division_time_step:])
                                ],
                                order=(self.num_division_time_step, 0, 0),
                                seasonal_order=(0, 0, 0,
                                                self.num_division_time_step))
                            self.arima_models[event_index] = model.fit()
                    except:  # skipcq FLK-E722
                        self.arima_models[event_index] = None
                        self.time_window_history[event_index] = []
                else:
                    self.arima_models[event_index] = None
                    self.time_window_history[event_index] = []

            self.prediction_history = persistence_data[1]
            self.time_history = persistence_data[2]
            self.result_list = persistence_data[3]

            # List of the pauses of the tests to the event numbers. If an arima model was initialized with the persistency, the model must
            # be trained before it can be used for forecasts. An integer states how many tests should be skipped before the next
            # output to this event number. None if no model was initialized for this event number.
            self.test_pause = [
                self.num_division_time_step
                if arima_models_statsmodel is not None else None
                for arima_models_statsmodel in self.arima_models
            ]
            # If all entries are None set the variable to None
            if all(entry is None for entry in self.test_pause):
                self.test_pause = None
        else:
            self.test_pause = None
    def run_analysis(self, master_fd):
        """
        Run the analysis thread.
        @param master_fd the main communication socket to the parent to receive logfile updates from the parent.
        @return 0 on success, e.g. normal termination via signal or 1 on error.
        """
        # The masterControlSocket is the socket to communicate with the master process to receive commands or logstream data. Expect
        # the parent/child communication socket on fd 3. This also duplicates the fd, so close the old one.
        self.master_control_socket = socket.fromfd(master_fd, socket.AF_UNIX, socket.SOCK_DGRAM, 0)
        os.close(master_fd)
        self.tracked_fds_dict[self.master_control_socket.fileno()] = self.master_control_socket

        # Locate the real analysis configuration.
        self.analysis_context.build_analysis_pipeline()
        if self.analysis_context.atomizer_factory is None:
            msg = 'build_analysis_pipeline() did not initialize atomizer_factory, terminating'
            print('FATAL: ' + msg, file=sys.stderr)
            logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).critical(msg)
            return 1

        real_time_triggered_components = self.analysis_context.real_time_triggered_components
        analysis_time_triggered_components = self.analysis_context.analysis_time_triggered_components

        max_memory_mb = self.analysis_context.aminer_config.config_properties.get(AMinerConfig.KEY_RESOURCES_MAX_MEMORY_USAGE, None)
        if max_memory_mb is not None:
            try:
                max_memory_mb = int(max_memory_mb)
                resource.setrlimit(resource.RLIMIT_AS, (max_memory_mb * 1024 * 1024, resource.RLIM_INFINITY))
                logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).debug('set max memory limit to %d MB.', max_memory_mb)
            except ValueError:
                msg = '%s must be an integer, terminating' % AMinerConfig.KEY_RESOURCES_MAX_MEMORY_USAGE
                print('FATAL: ' + msg, file=sys.stderr)
                logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).critical(msg)
                return 1

        # Load continuation data for last known log streams. The loaded data has to be a dictionary with repositioning information for
        # each stream. The data is used only when creating the first stream with that name.
        self.repositioning_data_dict = PersistenceUtil.load_json(self.persistence_file_name)
        if self.repositioning_data_dict is None:
            self.repositioning_data_dict = {}

        # A list of LogStreams where handleStream() blocked due to downstream not being able to consume the data yet.
        blocked_log_streams = []

        # Always start when number is None.
        next_real_time_trigger_time = None
        next_analysis_time_trigger_time = None
        next_backup_time_trigger_time = None
        log_stat_period = self.analysis_context.aminer_config.config_properties.get(
            AMinerConfig.KEY_LOG_STAT_PERIOD, AMinerConfig.DEFAULT_STAT_PERIOD)
        next_statistics_log_time = time.time() + log_stat_period

        delayed_return_status = 0
        while self.run_analysis_loop_flag:
            # Build the list of inputs to select for anew each time: the LogStream file descriptors may change due to rollover.
            input_select_fd_list = []
            output_select_fd_list = []
            for fd_handler_object in self.tracked_fds_dict.values():
                if isinstance(fd_handler_object, LogStream):
                    stream_fd = fd_handler_object.get_current_fd()
                    if stream_fd < 0:
                        continue
                    input_select_fd_list.append(stream_fd)
                elif isinstance(fd_handler_object, AnalysisChildRemoteControlHandler):
                    fd_handler_object.add_select_fds(input_select_fd_list, output_select_fd_list)
                else:
                    # This has to be a socket, just add the file descriptor.
                    input_select_fd_list.append(fd_handler_object.fileno())

            # Loop over the list in reverse order to avoid skipping elements in remove.
            if not suspended_flag:
                for log_stream in reversed(blocked_log_streams):
                    current_stream_fd = log_stream.handle_stream()
                    if current_stream_fd >= 0:
                        self.tracked_fds_dict[current_stream_fd] = log_stream
                        input_select_fd_list.append(current_stream_fd)
                        blocked_log_streams.remove(log_stream)

            read_list = None
            write_list = None
            try:
                (read_list, write_list, _except_list) = select.select(input_select_fd_list, output_select_fd_list, [], 1)
            except select.error as select_error:
                # Interrupting signals, e.g. for shutdown are OK.
                if select_error[0] == errno.EINTR:
                    continue
                msg = 'Unexpected select result %s' % str(select_error)
                print(msg, file=sys.stderr)
                logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg)
                delayed_return_status = 1
                break
            for read_fd in read_list:
                fd_handler_object = self.tracked_fds_dict[read_fd]
                if isinstance(fd_handler_object, LogStream):
                    # Handle this LogStream. Only when downstream processing blocks, add the stream to the blocked stream list.
                    handle_result = fd_handler_object.handle_stream()
                    if handle_result < 0:
                        # No need to care if current internal file descriptor in LogStream has changed in handleStream(),
                        # this will be handled when unblocking.
                        del self.tracked_fds_dict[read_fd]
                        blocked_log_streams.append(fd_handler_object)
                    elif handle_result != read_fd:
                        # The current fd has changed, update the tracking list.
                        del self.tracked_fds_dict[read_fd]
                        self.tracked_fds_dict[handle_result] = fd_handler_object
                    continue

                if isinstance(fd_handler_object, AnalysisChildRemoteControlHandler):
                    try:
                        fd_handler_object.do_receive()
                    except ConnectionError as receiveException:
                        msg = 'Unclean termination of remote control: %s' % str(receiveException)
                        logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg)
                        print(msg, file=sys.stderr)
                    if fd_handler_object.is_dead():
                        logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).debug('Deleting fd %s from tracked_fds_dict.', str(read_fd))
                        del self.tracked_fds_dict[read_fd]
                    # Reading is only attempted when output buffer was already flushed. Try processing the next request to fill the output
                    # buffer for next round.
                    else:
                        fd_handler_object.do_process(self.analysis_context)
                    continue

                if fd_handler_object == self.master_control_socket:
                    self.handle_master_control_socket_receive()
                    continue

                if fd_handler_object == self.remote_control_socket:
                    # We received a remote connection, accept it unconditionally. Users should make sure, that they do not exhaust
                    # resources by hogging open connections.
                    (control_client_socket, _remote_address) = self.remote_control_socket.accept()
                    # Keep track of information received via this remote control socket.
                    remote_control_handler = AnalysisChildRemoteControlHandler(control_client_socket)
                    self.tracked_fds_dict[control_client_socket.fileno()] = remote_control_handler
                    continue
                msg = 'Unhandled object type %s' % type(fd_handler_object)
                logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg)
                raise Exception(msg)

            for write_fd in write_list:
                fd_handler_object = self.tracked_fds_dict[write_fd]
                if isinstance(fd_handler_object, AnalysisChildRemoteControlHandler):
                    buffer_flushed_flag = False
                    try:
                        buffer_flushed_flag = fd_handler_object.do_send()
                    except OSError as sendError:
                        msg = 'Error at sending data via remote control: %s' % str(sendError)
                        print(msg, file=sys.stderr)
                        logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg)
                        try:
                            fd_handler_object.terminate()
                        except ConnectionError as terminateException:
                            msg = 'Unclean termination of remote control: %s' % str(terminateException)
                            print(msg, file=sys.stderr)
                            logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg)
                    if buffer_flushed_flag:
                        fd_handler_object.do_process(self.analysis_context)
                    if fd_handler_object.is_dead():
                        del self.tracked_fds_dict[write_fd]
                    continue
                msg = 'Unhandled object type %s' % type(fd_handler_object)
                logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg)
                raise Exception(msg)

            # Handle the real time events.
            real_time = time.time()
            if next_real_time_trigger_time is None or real_time >= next_real_time_trigger_time:
                next_trigger_offset = 3600
                for component in real_time_triggered_components:
                    if not suspended_flag:
                        next_trigger_request = component.do_timer(real_time)
                    next_trigger_offset = min(next_trigger_offset, next_trigger_request)
                next_real_time_trigger_time = real_time + next_trigger_offset

            if real_time >= next_statistics_log_time:
                next_statistics_log_time = real_time + log_stat_period
                logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).debug('Statistics logs are written..')
                # log the statistics for every component.
                for component_name in self.analysis_context.registered_components_by_name:
                    component = self.analysis_context.registered_components_by_name[component_name]
                    component.log_statistics(component_name)

            # Handle the analysis time events. The analysis time will be different when an analysis time component is registered.
            analysis_time = self.analysis_context.analysis_time
            if analysis_time is None:
                analysis_time = real_time
            if next_analysis_time_trigger_time is None or analysis_time >= next_analysis_time_trigger_time:
                next_trigger_offset = 3600
                for component in analysis_time_triggered_components:
                    if not suspended_flag:
                        next_trigger_request = component.do_timer(real_time)
                    next_trigger_offset = min(next_trigger_offset, next_trigger_request)
                next_analysis_time_trigger_time = analysis_time + next_trigger_offset

            # backup the persistence data.
            backup_time = time.time()
            backup_time_str = datetime.fromtimestamp(backup_time).strftime('%Y-%m-%d-%H-%M-%S')
            persistence_dir = self.analysis_context.aminer_config.config_properties.get(
                AMinerConfig.KEY_PERSISTENCE_DIR, AMinerConfig.DEFAULT_PERSISTENCE_DIR)
            persistence_dir = persistence_dir.rstrip('/')
            backup_path = persistence_dir + '/backup/'
            backup_path_with_date = os.path.join(backup_path, backup_time_str)
            if next_backup_time_trigger_time is None or backup_time >= next_backup_time_trigger_time:
                next_trigger_offset = 3600 * 24
                if next_backup_time_trigger_time is not None:
                    shutil.copytree(persistence_dir, backup_path_with_date, ignore=shutil.ignore_patterns('backup*'))
                    logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).info('Persistence backup created in %s.', backup_path_with_date)
                next_backup_time_trigger_time = backup_time + next_trigger_offset

        # Analysis loop is only left on shutdown. Try to persist everything and leave.
        PersistenceUtil.persist_all()
        for sock in self.tracked_fds_dict.values():
            sock.close()
        return delayed_return_status
Exemplo n.º 21
0
    def __init__(self,
                 aminer_config,
                 anomaly_event_handlers,
                 path_list=None,
                 id_path_list=None,
                 ignore_list=None,
                 allow_missing_id=False,
                 num_log_lines_solidify_matrix=100,
                 time_output_threshold=0,
                 anomaly_threshold=0.05,
                 persistence_id='Default',
                 auto_include_flag=False,
                 output_log_line=True):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param aminer_config configuration from analysis_context.
        @param anomaly_event_handlers for handling events, e.g., print events to stdout.
        @param path_list parser paths of values to be analyzed. Multiple paths mean that values are analyzed by their combined
        occurrences. When no paths are specified, the events given by the full path list are analyzed.
        @param id_path_list the list of paths where id values can be stored in all relevant log event types.
        @param ignore_list list of paths that are not considered for analysis, i.e., events that contain one of these paths are
        omitted. The default value is [] as None is not iterable.
        @param allow_missing_id when set to True, the detector will also use matches, where one of the pathes from target_path_list
        does not refer to an existing parsed data object.
        @param num_log_lines_solidify_matrix number of processed log lines after which the matrix is solidified.
        This process is periodically repeated.
        @param time_output_threshold threshold for the tested minimal transition time which has to be exceeded to be tested.
        @param anomaly_threshold threshold for the confidence which must be exceeded to raise an anomaly.
        @param persistence_id name of persistency document.
        @param auto_include_flag specifies whether newly observed sequences should be added to the learned model
        @param output_log_line specifies whether the full parsed log atom should be provided in the output.
        """
        # Input parameters
        self.aminer_config = aminer_config
        self.anomaly_event_handlers = anomaly_event_handlers
        self.path_list = path_list
        if self.path_list is None:
            self.path_list = []
        self.id_path_list = id_path_list
        if self.id_path_list is None:
            self.id_path_list = []
        self.ignore_list = ignore_list
        if self.ignore_list is None:
            self.ignore_list = []
        self.allow_missing_id = allow_missing_id
        self.num_log_lines_solidify_matrix = num_log_lines_solidify_matrix
        self.time_output_threshold = time_output_threshold
        self.anomaly_threshold = anomaly_threshold
        self.persistence_id = persistence_id
        self.auto_include_flag = auto_include_flag
        self.output_log_line = output_log_line
        self.next_persist_time = time.time(
        ) + self.aminer_config.config_properties.get(
            KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD)

        # Test if both path_list and id_path_list are not empty
        if [] in (self.path_list, self.id_path_list):
            msg = 'Both paths and id_path_list must not be empty.'
            logging.getLogger(DEBUG_LOG_NAME).warning(msg)
            print('WARNING: ' + msg, file=sys.stderr)

        # Initialization auxiliary variables
        self.time_matrix = {}
        self.last_value = {}
        self.last_time = {}
        self.log_total = 0

        # Load persistency
        self.persistence_file_name = build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)
        if persistence_data is not None:
            return_matrix = persistence_data[0]
            keys_1 = [tuple(key) for key in persistence_data[1]]
            keys_2 = [[tuple(key) for key in persistence_data[2][i]]
                      for i in range(len(persistence_data[2]))]
            self.time_matrix = {
                keys_1[i]: {
                    keys_2[i][j]: return_matrix[i][j]
                    for j in range(len(keys_2[i]))
                }
                for i in range(len(keys_1))
            }
    def __init__(self,
                 aminer_config,
                 anomaly_event_handlers,
                 target_path_list=None,
                 window_size=600,
                 num_windows=50,
                 confidence_factor=0.33,
                 empty_window_warnings=True,
                 early_exceeding_anomaly_output=False,
                 set_lower_limit=None,
                 set_upper_limit=None,
                 persistence_id='Default',
                 auto_include_flag=False,
                 output_log_line=True,
                 ignore_list=None,
                 constraint_list=None):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param aminer_config configuration from analysis_context.
        @param target_path_list parser paths of values to be analyzed. Multiple paths mean that values are analyzed by their combined
        occurrences. When no paths are specified, the events given by the full path list are analyzed.
        @param anomaly_event_handlers for handling events, e.g., print events to stdout.
        @param window_size the length of the time window for counting in seconds.
        @param num_windows the number of previous time windows considered for expected frequency estimation.
        @param confidence_factor defines range of tolerable deviation of measured frequency from expected frequency according to
        occurrences_mean +- occurrences_std / self.confidence_factor. Default value is 0.33 = 3*sigma deviation. confidence_factor
        must be in range [0, 1].
        @param empty_window_warnings whether anomalies should be generated for too small window sizes.
        @param early_exceeding_anomaly_output states if a anomaly should be raised the first time the appearance count exceedes the range.
        @param set_lower_limit sets the lower limit of the frequency test to the specified value.
        @param set_upper_limit sets the upper limit of the frequency test to the specified value.
        @param persistence_id name of persistency document.
        @param auto_include_flag specifies whether new frequency measurements override ground truth frequencies.
        @param output_log_line specifies whether the full parsed log atom should be provided in the output.
        @param ignore_list list of paths that are not considered for analysis, i.e., events that contain one of these paths are
        omitted. The default value is [] as None is not iterable.
        @param constrain_list list of paths that have to be present in the log atom to be analyzed.
        """
        self.target_path_list = target_path_list
        self.anomaly_event_handlers = anomaly_event_handlers
        self.auto_include_flag = auto_include_flag
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config
        self.next_persist_time = time.time(
        ) + self.aminer_config.config_properties.get(
            KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD)
        self.persistence_id = persistence_id
        self.constraint_list = constraint_list
        if self.constraint_list is None:
            self.constraint_list = []
        self.ignore_list = ignore_list
        if self.ignore_list is None:
            self.ignore_list = []
        self.window_size = window_size
        self.num_windows = num_windows
        if not 0 <= confidence_factor <= 1:
            logging.getLogger(DEBUG_LOG_NAME).warning(
                'confidence_factor must be in the range [0,1]!')
            confidence_factor = 1
        self.confidence_factor = confidence_factor
        self.empty_window_warnings = empty_window_warnings
        self.early_exceeding_anomaly_output = early_exceeding_anomaly_output
        self.set_lower_limit = set_lower_limit
        self.set_upper_limit = set_upper_limit
        self.next_check_time = None
        self.counts = {}
        self.ranges = {}
        self.exceeded_range_frequency = {}
        self.log_total = 0
        self.log_success = 0
        self.log_windows = 0

        self.persistence_file_name = build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)

        # Persisted data contains lists of event-frequency pairs, i.e., [[<ev>, [<freq1, freq2>]], [<ev>, [<freq1, freq2>]], ...]
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)
        if persistence_data is not None:
            for entry in persistence_data:
                log_event = entry[0]
                freqs = entry[1]
                # In case that num_windows differ, only take as many as possible
                self.counts[tuple(
                    log_event)] = freqs[max(0,
                                            len(freqs) - num_windows -
                                            1):] + [0]
            logging.getLogger(DEBUG_LOG_NAME).debug(
                '%s loaded persistence data.', self.__class__.__name__)
    def __init__(self,
                 aminer_config,
                 anomaly_event_handlers,
                 persistence_id='Default',
                 path_list=None,
                 min_num_vals=1000,
                 max_num_vals=1500,
                 save_values=True,
                 track_time_for_TSA=False,
                 waiting_time_for_TSA=300,
                 num_sections_waiting_time_for_TSA=10):
        """Initialize the detector. This will also trigger reading or creation of persistence storage location."""
        self.next_persist_time = time.time() + 600.0
        self.anomaly_event_handlers = anomaly_event_handlers
        self.num_events = 0
        # List of the longest path of the events
        self.longest_path = []
        # List of the keys corresponding to the events
        self.found_keys = []
        # List of the keys, which take values in the log-line
        self.variable_key_list = []
        # List of the values of the log-lines. If the lenght reaches max_num_vals the list gets reduced to min_num_vals values per variable
        self.values = []
        # Saves the number of lines of the event types
        self.num_eventlines = []
        # Saves the number of total log-lines
        self.total_records = 0
        # List of the modules which follow the event_type_detector. The implemented modules are form the list
        # [variableTypeDetector, variableCorrelationDetector]
        self.following_modules = []
        # List of paths, which variables are being tracked. All other paths will not get tracked. If None all paths are being tracked.
        self.path_list = path_list
        # List of bools, which state if the variables of variable_key_list are updated.
        self.check_variables = []
        # List ot the time trigger. The first list states the times when something should be triggered, the second list states the indices
        # of the eventtyps, or a list of the evnettype, a path and a value which should be counted (-1 for an initialization)
        # the third list states, the length of the time window (-1 for a one time trigger)
        self.etd_time_trigger = [[], [], []]
        # Reference containing the number of lines of the events for the TSA
        self.num_eventlines_TSA_ref = []
        # Index of the eventtype of the current log line
        self.current_index = 0
        # Number of the values which the list is being reduced to. Be cautious that this is higher than 'num_min_values'
        # in VarTypeD/Cor!!!
        self.min_num_vals = min_num_vals
        # Maximum number of lines in the value list before it is reduced. > min_num_vals.
        self.max_num_vals = max_num_vals
        # If False the values of the Token are not saved for further analysis. Disables self.values, and self.check_variables
        self.save_values = save_values
        # States if the time windows should be tracked for the time series analysis
        self.track_time_for_TSA = track_time_for_TSA
        # Time in seconds, until the time windows are being initialized
        self.waiting_time_for_TSA = waiting_time_for_TSA
        # Number of subdivisions of the initialization window. The length of the input-list of the function_Init-funtion is numSubd+1
        self.num_sections_waiting_time_for_TSA = num_sections_waiting_time_for_TSA
        self.aminer_config = aminer_config

        # Loads the persistence
        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)

        # Imports the persistence
        if persistence_data is not None:
            for key in persistence_data[0]:
                self.found_keys.append(set(key))
            self.variable_key_list = persistence_data[1]
            self.values = persistence_data[2]
            self.longest_path = persistence_data[3]
            self.check_variables = persistence_data[4]
            self.num_eventlines = persistence_data[5]
            self.etd_time_trigger = persistence_data[6]
            self.num_eventlines_TSA_ref = persistence_data[7]

            self.num_events = len(self.found_keys)
        else:
            if self.track_time_for_TSA:
                self.etd_time_trigger[0].append(-1)
                self.etd_time_trigger[1].append(-1)
                self.etd_time_trigger[2].append(-1)