def test1_receive_atom(self):
     """This test case checks whether a missing value is created without using the auto_include_flag (should not be the case)."""
     description = "Test1MissingMatchPathValueDetector"
     match_context_fixed_dme = MatchContext(self.pid)
     fixed_dme = FixedDataModelElement('s1', self.pid)
     match_element_fixed_dme = fixed_dme.get_match_element("match1", match_context_fixed_dme)
     missing_match_path_value_detector = MissingMatchPathValueDetector(self.aminer_config, [match_element_fixed_dme.get_path()], [
         self.stream_printer_event_handler], 'Default', False, self.__default_interval, self.__realert_interval)
     self.analysis_context.register_component(missing_match_path_value_detector, description)
     log_atom_fixed_dme = LogAtom(fixed_dme.fixed_data, ParserMatch(match_element_fixed_dme), 1, missing_match_path_value_detector)
     self.assertTrue(missing_match_path_value_detector.receive_atom(log_atom_fixed_dme))
 def test2_receive_atom_without_match_element(self):
     """This test case checks if the ReceiveAtom controls the MatchElement and responds correctly, when it is missing."""
     description = "Test2MissingMatchPathValueDetector"
     match_context_fixed_dme = MatchContext(self.pid)
     fixed_dme = FixedDataModelElement('s1', self.pid)
     match_element_fixed_dme = fixed_dme.get_match_element("match1", match_context_fixed_dme)
     match_context_fixed_dme = MatchContext(self.pid)
     matchElementFixedDME2 = fixed_dme.get_match_element("match2", match_context_fixed_dme)
     missing_match_path_value_detector = MissingMatchPathValueDetector(self.aminer_config, [match_element_fixed_dme.get_path()], [
         self.stream_printer_event_handler], 'Default', False, self.__default_interval, self.__realert_interval)
     self.analysis_context.register_component(missing_match_path_value_detector, description)
     log_atom_fixed_dme = LogAtom(fixed_dme.fixed_data, ParserMatch(matchElementFixedDME2), 1, missing_match_path_value_detector)
     self.assertFalse(missing_match_path_value_detector.receive_atom(log_atom_fixed_dme))
 def test11multiple_paths(self):
     """Test the functionality of the MissingMatchPathValueDetector with multiple paths."""
     description = "Test11MissingMatchPathValueDetector"
     match_context = MatchContext(self.pid + b"22")
     fixed_dme = FixedDataModelElement('s1', self.pid)
     decimal_integer_value_me = DecimalIntegerValueModelElement('d1', DecimalIntegerValueModelElement.SIGN_TYPE_NONE,
                                                                DecimalIntegerValueModelElement.PAD_TYPE_NONE)
     seq = SequenceModelElement('model', [fixed_dme, decimal_integer_value_me])
     match_element = seq.get_match_element("match", match_context)
     missing_match_path_value_detector = MissingMatchPathValueDetector(self.aminer_config, [
         "match/model", "match/model/s1", "match/model/d1"], [self.stream_printer_event_handler], 'Default', False,
         self.__default_interval, self.__realert_interval)
     self.analysis_context.register_component(missing_match_path_value_detector, description)
     log_atom = LogAtom(fixed_dme.fixed_data + b"22", ParserMatch(match_element), 1, missing_match_path_value_detector)
     self.assertTrue(missing_match_path_value_detector.receive_atom(log_atom))
    def test4_receive_atom_missing_value(self):
        """This test case checks if missing values are reported correctly."""
        description = "Test4MissingMatchPathValueDetector"
        t = time.time()
        match_context_fixed_dme = MatchContext(self.pid)
        fixed_dme = FixedDataModelElement('s1', self.pid)
        match_element_fixed_dme = fixed_dme.get_match_element("match1", match_context_fixed_dme)
        missing_match_path_value_detector = MissingMatchPathValueDetector(self.aminer_config, [match_element_fixed_dme.get_path()], [
            self.stream_printer_event_handler], 'Default', True, self.__default_interval, self.__realert_interval)
        self.analysis_context.register_component(missing_match_path_value_detector, description)
        log_atom_fixed_dme = LogAtom(fixed_dme.fixed_data, ParserMatch(match_element_fixed_dme), t, missing_match_path_value_detector)
        self.assertTrue(missing_match_path_value_detector.receive_atom(log_atom_fixed_dme))

        past_time = 4000
        missing_match_path_value_detector = MissingMatchPathValueDetector(self.aminer_config, [match_element_fixed_dme.get_path()], [
            self.stream_printer_event_handler], 'Default', True, missing_match_path_value_detector.default_interval - past_time,
            self.__realert_interval, output_log_line=False)
        self.analysis_context.register_component(missing_match_path_value_detector, description + "2")

        log_atom_fixed_dme = LogAtom(fixed_dme.fixed_data, ParserMatch(match_element_fixed_dme), t + past_time,
                                     missing_match_path_value_detector)
        self.assertTrue(missing_match_path_value_detector.receive_atom(log_atom_fixed_dme))
        self.assertEqual(self.output_stream.getvalue(), self.__expected_string % (
            datetime.fromtimestamp(t + past_time).strftime(self.datetime_format_string),
            missing_match_path_value_detector.__class__.__name__, description + "2", 1, self.match1_s1_overdue))
示例#5
0
    def test3_receive_atom_no_missing_value(self):
        """This test case checks whether the class returns wrong positives, when the time limit is not passed."""
        description = "Test3MissingMatchPathValueDetector"
        match_context_fixed_dme = MatchContext(self.pid)
        fixed_dme = FixedDataModelElement('s1', self.pid)
        match_element_fixed_dme = fixed_dme.get_match_element(
            "match1", match_context_fixed_dme)
        missing_match_path_value_detector = MissingMatchPathValueDetector(
            self.aminer_config, match_element_fixed_dme.get_path(),
            [self.stream_printer_event_handler], 'Default', True,
            self.__default_interval, self.__realert_interval)
        self.analysis_context.register_component(
            missing_match_path_value_detector, description)
        log_atom_fixed_dme = LogAtom(fixed_dme.fixed_data,
                                     ParserMatch(match_element_fixed_dme),
                                     time.time(),
                                     missing_match_path_value_detector)
        self.assertTrue(
            missing_match_path_value_detector.receive_atom(log_atom_fixed_dme))

        past_time = 3200
        missing_match_path_value_detector = MissingMatchPathValueDetector(
            self.aminer_config, match_element_fixed_dme.get_path(),
            [self.stream_printer_event_handler], 'Default', True,
            missing_match_path_value_detector.default_interval - past_time,
            self.__realert_interval)

        log_atom_fixed_dme = LogAtom(fixed_dme.fixed_data,
                                     ParserMatch(match_element_fixed_dme),
                                     time.time() + past_time,
                                     missing_match_path_value_detector)
        self.assertTrue(
            missing_match_path_value_detector.receive_atom(log_atom_fixed_dme))
        self.assertEqual(self.output_stream.getvalue(), '')
示例#6
0
def build_analysis_pipeline(analysis_context):
    """
    Define the function to create pipeline for parsing the log data.
    It has also to define an AtomizerFactory to instruct aminer how to process incoming data streams to create log atoms from them.
    """
    # Build the parsing model:

    service_children_disk_report = [
        FixedDataModelElement(
            'Space',
            b' Current Disk Data is: Filesystem     Type  Size  Used Avail Use%'
        ),
        DelimitedDataModelElement('Data', b'%'),
        AnyByteDataModelElement('Rest')
    ]

    service_children_login_details = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedWordlistDataModelElement('Status',
                                      [b' logged in', b' logged out']),
        OptionalMatchModelElement(
            'PastTime',
            SequenceModelElement('Time', [
                FixedDataModelElement('Blank', b' '),
                DecimalIntegerValueModelElement('Minutes'),
                FixedDataModelElement('Ago', b' minutes ago.')
            ]))
    ]

    service_children_cron_job = [
        DateTimeModelElement('DTM', b'%Y-%m-%d %H:%M:%S'),
        FixedDataModelElement('UNameSpace1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('UNameSpace2', b' '),
        DelimitedDataModelElement('User', b' '),
        FixedDataModelElement('Cron', b' cron['),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Details', b']: Job `cron.daily` started.')
    ]

    service_children_random_time = [
        FixedDataModelElement('Space', b'Random: '),
        DecimalIntegerValueModelElement('Random')
    ]

    service_children_sensors = [
        SequenceModelElement('CPUTemp', [
            FixedDataModelElement('FixedTemp', b'CPU Temp: '),
            DecimalIntegerValueModelElement('Temp'),
            FixedDataModelElement('Degrees', b'\xc2\xb0C')
        ]),
        FixedDataModelElement('Space1', b', '),
        SequenceModelElement('CPUWorkload', [
            FixedDataModelElement('FixedWorkload', b'CPUWorkload: '),
            DecimalIntegerValueModelElement('Workload'),
            FixedDataModelElement('Percent', b'%')
        ]),
        FixedDataModelElement('Space2', b', '),
        DateTimeModelElement('DTM', b'%Y-%m-%d %H:%M:%S')
    ]

    service_children_user_ip_address = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedDataModelElement('Action', b' changed IP address to '),
        IpAddressDataModelElement('IP')
    ]

    service_children_cron_job_announcement = [
        DateTimeModelElement('DTM', b'%Y-%m-%d %H:%M:%S'),
        FixedDataModelElement('Space', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', b' cron['),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Run', b']: Will run job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('StartTime', b'\' in 5 min.')
    ]

    service_children_cron_job_execution = [
        DateTimeModelElement('DTM', b'%Y-%m-%d %H:%M:%S'),
        FixedDataModelElement('Space1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', b' cron['),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Job', b']: Job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Started', b'\' started')
    ]

    parsing_model = FirstMatchModelElement('model', [
        SequenceModelElement('CronAnnouncement',
                             service_children_cron_job_announcement),
        SequenceModelElement('CronExecution',
                             service_children_cron_job_execution),
        SequenceModelElement('DailyCron', service_children_cron_job),
        SequenceModelElement('DiskReport', service_children_disk_report),
        SequenceModelElement('LoginDetails', service_children_login_details),
        DecimalIntegerValueModelElement('Random'),
        SequenceModelElement('RandomTime', service_children_random_time),
        SequenceModelElement('Sensors', service_children_sensors),
        SequenceModelElement('IPAddresses', service_children_user_ip_address)
    ])

    # Some generic imports.
    from aminer.analysis import AtomFilters

    # Create all global handler lists here and append the real handlers later on.
    # Use this filter to distribute all atoms to the analysis handlers.
    atom_filters = AtomFilters.SubhandlerFilter(None)
    analysis_context.register_component(atom_filters,
                                        component_name="AtomFilter")

    from aminer.analysis.TimestampCorrectionFilters import SimpleMonotonicTimestampAdjust
    simple_monotonic_timestamp_adjust = SimpleMonotonicTimestampAdjust(
        [atom_filters])
    analysis_context.register_component(
        simple_monotonic_timestamp_adjust,
        component_name="SimpleMonotonicTimestampAdjust")

    from aminer.events.StreamPrinterEventHandler import StreamPrinterEventHandler
    stream_printer_event_handler = StreamPrinterEventHandler(
        analysis_context)  # skipcq: BAN-B108
    from aminer.events.Utils import VolatileLogarithmicBackoffEventHistory
    volatile_logarithmic_backoff_event_history = VolatileLogarithmicBackoffEventHistory(
        100)
    anomaly_event_handlers = [
        stream_printer_event_handler,
        volatile_logarithmic_backoff_event_history
    ]
    analysis_context.register_component(
        volatile_logarithmic_backoff_event_history,
        component_name="VolatileLogarithmicBackoffEventHistory")

    # Now define the AtomizerFactory using the model. A simple line based one is usually sufficient.
    from aminer.input.SimpleByteStreamLineAtomizerFactory import SimpleByteStreamLineAtomizerFactory
    analysis_context.atomizer_factory = SimpleByteStreamLineAtomizerFactory(
        parsing_model, [simple_monotonic_timestamp_adjust],
        anomaly_event_handlers)

    # Just report all unparsed atoms to the event handlers.
    from aminer.analysis.UnparsedAtomHandlers import SimpleUnparsedAtomHandler
    simple_unparsed_atom_handler = SimpleUnparsedAtomHandler(
        anomaly_event_handlers)
    atom_filters.add_handler(simple_unparsed_atom_handler,
                             stop_when_handled_flag=True)
    analysis_context.register_component(simple_unparsed_atom_handler,
                                        component_name="UnparsedHandler")

    from aminer.analysis.TimestampsUnsortedDetector import TimestampsUnsortedDetector
    timestamps_unsorted_detector = TimestampsUnsortedDetector(
        analysis_context.aminer_config, anomaly_event_handlers)
    atom_filters.add_handler(timestamps_unsorted_detector)
    analysis_context.register_component(
        timestamps_unsorted_detector,
        component_name="TimestampsUnsortedDetector")

    from aminer.analysis import Rules
    from aminer.analysis.AllowlistViolationDetector import AllowlistViolationDetector
    allowlist_rules = [
        Rules.OrMatchRule([
            Rules.AndMatchRule([
                Rules.PathExistsMatchRule(
                    '/model/LoginDetails/PastTime/Time/Minutes'),
                Rules.NegationMatchRule(
                    Rules.ValueMatchRule('/model/LoginDetails/Username',
                                         b'root'))
            ]),
            Rules.AndMatchRule([
                Rules.NegationMatchRule(
                    Rules.PathExistsMatchRule(
                        '/model/LoginDetails/PastTime/Time/Minutes')),
                Rules.PathExistsMatchRule('/model/LoginDetails')
            ]),
            Rules.NegationMatchRule(
                Rules.PathExistsMatchRule('/model/LoginDetails'))
        ])
    ]

    # This rule list should trigger, when the line does not look like: User root (logged in, logged out)
    # or User 'username' (logged in, logged out) x minutes ago.
    allowlist_violation_detector = AllowlistViolationDetector(
        analysis_context.aminer_config, allowlist_rules,
        anomaly_event_handlers)
    analysis_context.register_component(allowlist_violation_detector,
                                        component_name="Allowlist")
    atom_filters.add_handler(allowlist_violation_detector)

    from aminer.analysis.ParserCount import ParserCount
    parser_count = ParserCount(analysis_context.aminer_config, None,
                               anomaly_event_handlers, 10)
    analysis_context.register_component(parser_count,
                                        component_name="ParserCount")
    atom_filters.add_handler(parser_count)

    from aminer.analysis.EventCorrelationDetector import EventCorrelationDetector
    ecd = EventCorrelationDetector(analysis_context.aminer_config,
                                   anomaly_event_handlers,
                                   check_rules_flag=True,
                                   hypothesis_max_delta_time=1.0,
                                   auto_include_flag=True)
    analysis_context.register_component(
        ecd, component_name="EventCorrelationDetector")
    atom_filters.add_handler(ecd)

    from aminer.analysis.NewMatchPathDetector import NewMatchPathDetector
    new_match_path_detector = NewMatchPathDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_detector,
                                        component_name="NewMatchPath")
    atom_filters.add_handler(new_match_path_detector)

    def tuple_transformation_function(match_value_list):
        """Only allow output of the EnhancedNewMatchPathValueComboDetector after every 10000th element."""
        extra_data = enhanced_new_match_path_value_combo_detector.known_values_dict.get(
            tuple(match_value_list))
        if extra_data is not None:
            mod = 10000
            if (extra_data[2] + 1) % mod == 0:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = False
            else:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = True
        return match_value_list

    from aminer.analysis.EnhancedNewMatchPathValueComboDetector import EnhancedNewMatchPathValueComboDetector
    enhanced_new_match_path_value_combo_detector = EnhancedNewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/UName', '/model/DailyCron/JobNumber'],
        anomaly_event_handlers,
        auto_include_flag=False,
        tuple_transformation_function=tuple_transformation_function)
    analysis_context.register_component(
        enhanced_new_match_path_value_combo_detector,
        component_name="EnhancedNewValueCombo")
    atom_filters.add_handler(enhanced_new_match_path_value_combo_detector)

    from aminer.analysis.HistogramAnalysis import HistogramAnalysis, LinearNumericBinDefinition, ModuloTimeBinDefinition, \
        PathDependentHistogramAnalysis
    modulo_time_bin_definition = ModuloTimeBinDefinition(
        86400, 3600, 0, 1, 24, True)
    linear_numeric_bin_definition = LinearNumericBinDefinition(50, 5, 20, True)
    histogram_analysis = HistogramAnalysis(
        analysis_context.aminer_config,
        [('/model/RandomTime/Random', modulo_time_bin_definition),
         ('/model/Random', linear_numeric_bin_definition)], 10,
        anomaly_event_handlers)
    analysis_context.register_component(histogram_analysis,
                                        component_name="HistogramAnalysis")
    atom_filters.add_handler(histogram_analysis)

    path_dependent_histogram_analysis = PathDependentHistogramAnalysis(
        analysis_context.aminer_config, '/model/RandomTime',
        modulo_time_bin_definition, 10, anomaly_event_handlers)
    analysis_context.register_component(
        path_dependent_histogram_analysis,
        component_name="PathDependentHistogramAnalysis")
    atom_filters.add_handler(path_dependent_histogram_analysis)

    from aminer.analysis.MatchValueAverageChangeDetector import MatchValueAverageChangeDetector
    match_value_average_change_detector = MatchValueAverageChangeDetector(
        analysis_context.aminer_config, anomaly_event_handlers, None,
        ['/model/Random'], 100, 10)
    analysis_context.register_component(
        match_value_average_change_detector,
        component_name="MatchValueAverageChange")
    atom_filters.add_handler(match_value_average_change_detector)

    import sys
    from aminer.analysis.MatchValueStreamWriter import MatchValueStreamWriter
    match_value_stream_writer = MatchValueStreamWriter(sys.stdout, [
        '/model/Sensors/CPUTemp', '/model/Sensors/CPUWorkload',
        '/model/Sensors/DTM'
    ], b';', b'')
    analysis_context.register_component(
        match_value_stream_writer, component_name="MatchValueStreamWriter")
    atom_filters.add_handler(match_value_stream_writer)

    from aminer.analysis.NewMatchPathValueComboDetector import NewMatchPathValueComboDetector
    new_match_path_value_combo_detector = NewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/IPAddresses/Username', '/model/IPAddresses/IP'],
        anomaly_event_handlers,
        auto_include_flag=False)
    analysis_context.register_component(
        new_match_path_value_combo_detector,
        component_name="NewMatchPathValueCombo")
    atom_filters.add_handler(new_match_path_value_combo_detector)

    from aminer.analysis.NewMatchIdValueComboDetector import NewMatchIdValueComboDetector
    new_match_id_value_combo_detector = NewMatchIdValueComboDetector(
        analysis_context.aminer_config,
        ['/model/type/path/name', '/model/type/syscall/syscall'],
        anomaly_event_handlers,
        id_path_list=['/model/type/path/id', '/model/type/syscall/id'],
        min_allowed_time_diff=5,
        auto_include_flag=True,
        allow_missing_values_flag=True,
        output_log_line=True)
    analysis_context.register_component(
        new_match_id_value_combo_detector,
        component_name="NewMatchIdValueComboDetector")
    atom_filters.add_handler(new_match_id_value_combo_detector)

    from aminer.analysis.NewMatchPathValueDetector import NewMatchPathValueDetector
    new_match_path_value_detector = NewMatchPathValueDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/Job Number', '/model/IPAddresses/Username'],
        anomaly_event_handlers,
        auto_include_flag=False)
    analysis_context.register_component(new_match_path_value_detector,
                                        component_name="NewMatchPathValue")
    atom_filters.add_handler(new_match_path_value_detector)

    from aminer.analysis.MissingMatchPathValueDetector import MissingMatchPathValueDetector
    missing_match_path_value_detector = MissingMatchPathValueDetector(
        analysis_context.aminer_config, ['/model/DiskReport/Space'],
        anomaly_event_handlers,
        auto_include_flag=False,
        default_interval=2,
        realert_interval=5)
    analysis_context.register_component(missing_match_path_value_detector,
                                        component_name="MissingMatch")
    atom_filters.add_handler(missing_match_path_value_detector)

    from aminer.analysis.TimeCorrelationDetector import TimeCorrelationDetector
    time_correlation_detector = TimeCorrelationDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        2,
        min_rule_attributes=1,
        max_rule_attributes=5,
        record_count_before_event=70000,
        output_log_line=True)
    analysis_context.register_component(
        time_correlation_detector, component_name="TimeCorrelationDetector")
    atom_filters.add_handler(time_correlation_detector)

    from aminer.analysis.TimeCorrelationViolationDetector import TimeCorrelationViolationDetector, CorrelationRule, EventClassSelector
    cron_job_announcement = CorrelationRule(
        'CronJobAnnouncement',
        5,
        6,
        max_artefacts_a_for_single_b=1,
        artefact_match_parameters=[('/model/CronAnnouncement/JobNumber',
                                    '/model/CronExecution/JobNumber')])
    a_class_selector = EventClassSelector('Announcement',
                                          [cron_job_announcement], None)
    b_class_selector = EventClassSelector('Execution', None,
                                          [cron_job_announcement])
    rules = [
        Rules.PathExistsMatchRule('/model/CronAnnouncement/Run',
                                  a_class_selector),
        Rules.PathExistsMatchRule('/model/CronExecution/Job', b_class_selector)
    ]

    time_correlation_violation_detector = TimeCorrelationViolationDetector(
        analysis_context.aminer_config, rules, anomaly_event_handlers)
    analysis_context.register_component(
        time_correlation_violation_detector,
        component_name="TimeCorrelationViolationDetector")
    atom_filters.add_handler(time_correlation_violation_detector)

    from aminer.events.DefaultMailNotificationEventHandler import DefaultMailNotificationEventHandler
    if DefaultMailNotificationEventHandler.CONFIG_KEY_MAIL_TARGET_ADDRESS in analysis_context.aminer_config.config_properties:
        mail_notification_handler = DefaultMailNotificationEventHandler(
            analysis_context)
        analysis_context.register_component(mail_notification_handler,
                                            component_name="MailHandler")
        anomaly_event_handlers.append(mail_notification_handler)
    def test12multiple_paths_data_from_file(self):
        """Test the functionality of the MissingMatchPathValueDetector with multiple paths with more data."""
        description = "Test12MissingMatchPathValueDetector"
        with open('unit/data/multiple_pathes_mmpvd.txt', 'rb') as f:
            data = f.readlines()

        host1 = FixedDataModelElement("host1", b"host1 ")
        host2 = FixedDataModelElement("host2", b"host2 ")
        service1 = FixedDataModelElement("service1", b"service1")
        service2 = FixedDataModelElement("service2", b"service2")
        seq11 = SequenceModelElement("seq11", [host1, service1])
        seq12 = SequenceModelElement("seq12", [host1, service2])
        seq21 = SequenceModelElement("seq21", [host2, service1])
        seq22 = SequenceModelElement("seq22", [host2, service2])
        first = FirstMatchModelElement("first", [seq11, seq12, seq21, seq22])
        missing_match_path_value_detector11 = MissingMatchPathValueDetector(self.aminer_config, [
            "match/first/seq11", "match/first/seq11/host1", "match/first/seq11/service1"], [self.stream_printer_event_handler],
            'Default11', True, 480, 480)
        self.analysis_context.register_component(missing_match_path_value_detector11, description+"11")
        missing_match_path_value_detector12 = MissingMatchPathValueDetector(self.aminer_config, [
            "match/first/seq12", "match/first/seq12/host1", "match/first/seq12/service2"], [self.stream_printer_event_handler],
            'Default23', True, 480, 480)
        self.analysis_context.register_component(missing_match_path_value_detector12, description+"12")
        missing_match_path_value_detector21 = MissingMatchPathValueDetector(self.aminer_config, [
            "match/first/seq21", "match/first/seq21/host2", "match/first/seq21/service1"], [self.stream_printer_event_handler],
            'Default21', True, 480, 480)
        self.analysis_context.register_component(missing_match_path_value_detector21, description+"21")
        missing_match_path_value_detector22 = MissingMatchPathValueDetector(self.aminer_config, [
            "match/first/seq22", "match/first/seq22/host2", "match/first/seq22/service2"], [self.stream_printer_event_handler],
            'Default22', True, 480, 480)
        self.analysis_context.register_component(missing_match_path_value_detector22, description+"22")
        t = 0
        for line in data:
            split_line = line.rsplit(b" ", 2)
            date = datetime.strptime(split_line[0].decode(), "%Y-%m-%d %H:%M:%S")
            date = date.astimezone(timezone.utc)
            t = (date - datetime(1970, 1, 1, tzinfo=timezone.utc)).total_seconds()
            # initialize the detectors and remove the first output.
            if missing_match_path_value_detector11.auto_include_flag is True:
                line = b"host1 service1host1 service2host2 service1host2 service2"
                match_context = MatchContext(line)
                match_element = first.get_match_element("match", match_context)
                log_atom = LogAtom(line, ParserMatch(match_element), t, missing_match_path_value_detector11)
                missing_match_path_value_detector11.receive_atom(log_atom)
                missing_match_path_value_detector11.auto_include_flag = False
                match_element = first.get_match_element("match", match_context)
                log_atom = LogAtom(line, ParserMatch(match_element), t, missing_match_path_value_detector12)
                missing_match_path_value_detector12.receive_atom(log_atom)
                missing_match_path_value_detector12.auto_include_flag = False
                match_element = first.get_match_element("match", match_context)
                log_atom = LogAtom(line, ParserMatch(match_element), t, missing_match_path_value_detector21)
                missing_match_path_value_detector21.receive_atom(log_atom)
                missing_match_path_value_detector21.auto_include_flag = False
                match_element = first.get_match_element("match", match_context)
                log_atom = LogAtom(line, ParserMatch(match_element), t, missing_match_path_value_detector22)
                missing_match_path_value_detector22.receive_atom(log_atom)
                missing_match_path_value_detector22.auto_include_flag = False
                self.reset_output_stream()
            line = split_line[1] + b" " + split_line[2]
            match_context = MatchContext(line)
            match_element = first.get_match_element("match", match_context)
            log_atom = LogAtom(line, ParserMatch(match_element), t, missing_match_path_value_detector11)
            res = missing_match_path_value_detector11.receive_atom(log_atom)
            if match_element.get_path() == "match/first/seq11":
                self.assertTrue(res)
            res = missing_match_path_value_detector12.receive_atom(log_atom)
            if match_element.get_path() == "match/first/seq12":
                self.assertTrue(res)
            res = missing_match_path_value_detector21.receive_atom(log_atom)
            if match_element.get_path() == "match/first/seq21":
                self.assertTrue(res)
            res = missing_match_path_value_detector22.receive_atom(log_atom)
            if match_element.get_path() == "match/first/seq22":
                self.assertTrue(res)
        # need to produce a valid match to trigger missing match paths.
        line = b"host1 service1host1 service2host2 service1host2 service2"
        match_context = MatchContext(line)
        match_element = first.get_match_element("match", match_context)
        log_atom = LogAtom(line, ParserMatch(match_element), t, missing_match_path_value_detector11)
        missing_match_path_value_detector11.receive_atom(log_atom)
        match_element = first.get_match_element("match", match_context)
        log_atom = LogAtom(line, ParserMatch(match_element), t, missing_match_path_value_detector12)
        missing_match_path_value_detector12.receive_atom(log_atom)
        match_element = first.get_match_element("match", match_context)
        log_atom = LogAtom(line, ParserMatch(match_element), t, missing_match_path_value_detector21)
        missing_match_path_value_detector21.receive_atom(log_atom)
        match_element = first.get_match_element("match", match_context)
        log_atom = LogAtom(line, ParserMatch(match_element), t, missing_match_path_value_detector22)
        missing_match_path_value_detector22.receive_atom(log_atom)

        # exactly one overdue should be found
        msg = "2021-03-12 21:30:51 Interval too large between values\nMissingMatchPathValueDetector: \"Test12MissingMatchPathValue" \
              "Detector11\" (1 lines)\n    ['match/first/seq11', 'match/first/seq11/host1', 'match/first/seq11/service1']: \"['host1 " \
              "service1', 'host1 ', 'service1']\" overdue 12.0s (interval 480)\n\n"
        self.assertEqual(msg, self.output_stream.getvalue())
    def test5_missing_value_on_persisted(self):
        """Persisting elements is tested in this test case."""
        description = "Test5MissingMatchPathValueDetector"
        t = time.time()
        match_context_fixed_dme = MatchContext(self.pid)
        fixed_dme = FixedDataModelElement('s1', self.pid)
        match_element_fixed_dme = fixed_dme.get_match_element("match1", match_context_fixed_dme)
        missing_match_path_value_detector = MissingMatchPathValueDetector(self.aminer_config, [match_element_fixed_dme.get_path()], [
            self.stream_printer_event_handler], 'Default', True, self.__default_interval, self.__realert_interval)
        self.analysis_context.register_component(missing_match_path_value_detector, description)
        log_atom_fixed_dme = LogAtom(fixed_dme.fixed_data, ParserMatch(match_element_fixed_dme), round(t),
                                     missing_match_path_value_detector)
        self.assertTrue(missing_match_path_value_detector.receive_atom(log_atom_fixed_dme))
        missing_match_path_value_detector.do_persist()

        past_time = 4000
        other_missing_match_path_value_detector = MissingMatchPathValueDetector(self.aminer_config, [match_element_fixed_dme.get_path()], [
            self.stream_printer_event_handler], 'Default', True, self.__default_interval, self.__realert_interval)
        self.analysis_context.register_component(other_missing_match_path_value_detector, description + "2")
        other_missing_match_path_value_detector.set_check_value(other_missing_match_path_value_detector.get_channel_key(
            log_atom_fixed_dme)[1], self.__default_interval - past_time, match_element_fixed_dme.get_path())

        log_atom_fixed_dme = LogAtom(fixed_dme.fixed_data, ParserMatch(match_element_fixed_dme), round(t) + past_time,
                                     other_missing_match_path_value_detector)
        self.assertTrue(other_missing_match_path_value_detector.receive_atom(log_atom_fixed_dme))
        # skipcq: PYL-R1714
        self.assertTrue((self.output_stream.getvalue() == self.__expected_string % (
            datetime.fromtimestamp(t + past_time).strftime(self.datetime_format_string),
            other_missing_match_path_value_detector.__class__.__name__, description + "2", 1, self.match1_s1_overdue)) or (
                        self.output_stream.getvalue() == self.__expected_string % (
                            datetime.fromtimestamp(t + past_time + 1).strftime(self.datetime_format_string),
                            other_missing_match_path_value_detector.__class__.__name__, description + "2", 1, self.match1_s1_overdue)))
def build_analysis_pipeline(analysis_context):
    """
    Define the function to create pipeline for parsing the log data.
    It has also to define an AtomizerFactory to instruct aminer how to process incoming data streams to create log atoms from them.
    """
    date_format_string = b'%Y-%m-%d %H:%M:%S'
    cron = b' cron['

    # Build the parsing model:

    service_children_disk_report = [
        FixedDataModelElement('Space', b' Current Disk Data is: Filesystem     Type  Size  Used Avail Use%'),
        DelimitedDataModelElement('Data', b'%'), AnyByteDataModelElement('Rest')]

    service_children_login_details = [
        FixedDataModelElement('User/LoginDetails', b'User '), DelimitedDataModelElement('Username', b' '),
        FixedWordlistDataModelElement('Status', [b' logged in', b' logged out']),
        OptionalMatchModelElement('PastTime', SequenceModelElement('Time', [
            FixedDataModelElement('Blank', b' '), DecimalIntegerValueModelElement('Minutes'),
            FixedDataModelElement('Ago', b' minutes ago.')]))]

    service_children_cron_job = [
        DateTimeModelElement('DTM', date_format_string), FixedDataModelElement('UNameSpace1', b' '),
        DelimitedDataModelElement('UName', b' '), FixedDataModelElement('UNameSpace2', b' '), DelimitedDataModelElement('User', b' '),
        FixedDataModelElement('Cron', cron), DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Details', b']: Job `cron.daily` started.')]

    service_children_random_time = [FixedDataModelElement('Space', b'Random: '), DecimalIntegerValueModelElement('Random')]

    service_children_sensors = [SequenceModelElement('CPUTemp', [
        FixedDataModelElement('FixedTemp', b'CPU Temp: '), DecimalIntegerValueModelElement('Temp'),
        FixedDataModelElement('Degrees', b'\xc2\xb0C')]), FixedDataModelElement('Space1', b', '), SequenceModelElement('CPUWorkload', [
            FixedDataModelElement('FixedWorkload', b'CPU Workload: '), DecimalIntegerValueModelElement('Workload'),
            FixedDataModelElement('Percent', b'%')]), FixedDataModelElement('Space2', b', '),
        DateTimeModelElement('DTM', date_format_string)]

    service_children_user_ip_address = [
        FixedDataModelElement('User/UserIPAddress', b'User '), DelimitedDataModelElement('Username', b' '),
        FixedDataModelElement('Action', b' changed IP address to '), IpAddressDataModelElement('IP')]

    service_children_cron_job_announcement = [
        DateTimeModelElement('DTM', date_format_string), FixedDataModelElement('Space', b' '),
        DelimitedDataModelElement('UName', b' '), FixedDataModelElement('Cron', cron), DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Run', b']: Will run job `'),
        FixedWordlistDataModelElement('CronType', [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('StartTime', b'\' in 5 min.')]

    service_children_cron_job_execution = [
        DateTimeModelElement('DTM', date_format_string), FixedDataModelElement('Space1', b' '),
        DelimitedDataModelElement('UName', b' '), FixedDataModelElement('Cron', cron), DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Job', b']: Job `'),
        FixedWordlistDataModelElement('CronType', [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Started', b'\' started')]

    service_children_audit = [SequenceModelElement('path', [
        FixedDataModelElement('type', b'type=PATH '), FixedDataModelElement('msg_audit', b'msg=audit('),
        DelimitedDataModelElement('msg', b':'), FixedDataModelElement('placeholder', b':'), DecimalIntegerValueModelElement('id'),
        FixedDataModelElement('item_string', b'): item='), DecimalIntegerValueModelElement('item'),
        FixedDataModelElement('name_string', b' name="'), DelimitedDataModelElement('name', b'"'),
        FixedDataModelElement('inode_string', b'" inode='), DecimalIntegerValueModelElement('inode'),
        FixedDataModelElement('dev_string', b' dev='), DelimitedDataModelElement('dev', b' '),
        FixedDataModelElement('mode_string', b' mode='),
        DecimalIntegerValueModelElement('mode', value_pad_type=DecimalIntegerValueModelElement.PAD_TYPE_ZERO),
        FixedDataModelElement('ouid_string', b' ouid='), DecimalIntegerValueModelElement('ouid'),
        FixedDataModelElement('ogid_string', b' ogid='), DecimalIntegerValueModelElement('ogid'),
        FixedDataModelElement('rdev_string', b' rdev='), DelimitedDataModelElement('rdev', b' '),
        FixedDataModelElement('nametype_string', b' nametype='), FixedWordlistDataModelElement('nametype', [b'NORMAL', b'ERROR'])]),
        SequenceModelElement('syscall', [
            FixedDataModelElement('type', b'type=SYSCALL '), FixedDataModelElement('msg_audit', b'msg=audit('),
            DelimitedDataModelElement('msg', b':'), FixedDataModelElement('placeholder', b':'), DecimalIntegerValueModelElement('id'),
            FixedDataModelElement('arch_string', b'): arch='), DelimitedDataModelElement('arch', b' '),
            FixedDataModelElement('syscall_string', b' syscall='), DecimalIntegerValueModelElement('syscall'),
            FixedDataModelElement('success_string', b' success='), FixedWordlistDataModelElement('success', [b'yes', b'no']),
            FixedDataModelElement('exit_string', b' exit='), DecimalIntegerValueModelElement('exit'),
            AnyByteDataModelElement('remainding_data')])]

    service_children_parsing_model_element = [
        DateTimeModelElement('DateTimeModelElement', b'Current DateTime: %d.%m.%Y %H:%M:%S'),
        DecimalFloatValueModelElement('DecimalFloatValueModelElement', value_sign_type='optional'),
        DecimalIntegerValueModelElement('DecimalIntegerValueModelElement', value_sign_type='optional', value_pad_type='blank'),
        SequenceModelElement('se', [
            DelimitedDataModelElement('DelimitedDataModelElement', b';'), FixedDataModelElement('FixedDataModelElement', b';')])]

    # ElementValueBranchModelElement
    fixed_data_me1 = FixedDataModelElement("fixed1", b'match ')
    fixed_data_me2 = FixedDataModelElement("fixed2", b'fixed String')
    fixed_wordlist_data_model_element = FixedWordlistDataModelElement("wordlist", [b'data: ', b'string: '])
    decimal_integer_value_model_element = DecimalIntegerValueModelElement("decimal")

    service_children_parsing_model_element.append(
        ElementValueBranchModelElement('ElementValueBranchModelElement', FirstMatchModelElement("first", [
            SequenceModelElement("seq1", [fixed_data_me1, fixed_wordlist_data_model_element]),
            SequenceModelElement("seq2", [fixed_data_me1, fixed_wordlist_data_model_element, fixed_data_me2])]), "wordlist",
                                 {0: decimal_integer_value_model_element, 1: fixed_data_me2}))
    service_children_parsing_model_element.append(HexStringModelElement('HexStringModelElement'))
    service_children_parsing_model_element.append(SequenceModelElement('se2', [
        FixedDataModelElement('FixedDataModelElement', b'Gateway IP-Address: '), IpAddressDataModelElement('IpAddressDataModelElement')]))
    import locale
    loc = locale.getlocale()
    if loc == (None, None):
        loc = ('en_US', 'utf8')
    service_children_parsing_model_element.append(
        MultiLocaleDateTimeModelElement('MultiLocaleDateTimeModelElement', [(b'%b %d %Y', None, '%s.%s' % loc)]))
    service_children_parsing_model_element.append(
        RepeatedElementDataModelElement('RepeatedElementDataModelElement', SequenceModelElement('SequenceModelElement', [
            FixedDataModelElement('FixedDataModelElement', b'[drawn number]: '),
            DecimalIntegerValueModelElement('DecimalIntegerValueModelElement')]), 1))
    service_children_parsing_model_element.append(VariableByteDataModelElement('VariableByteDataModelElement', b'-@#'))
    service_children_parsing_model_element.append(SequenceModelElement('se', [
        WhiteSpaceLimitedDataModelElement('WhiteSpaceLimitedDataModelElement'), FixedDataModelElement('fixed', b' ')]))

    # The Base64StringModelElement must be just before the AnyByteDataModelElement to avoid unexpected Matches.
    service_children_parsing_model_element.append(Base64StringModelElement('Base64StringModelElement'))

    # The OptionalMatchModelElement must be paired with a FirstMatchModelElement because it accepts all data and thus no data gets
    # to the AnyByteDataModelElement. The AnyByteDataModelElement must be last, because all bytes are accepted.
    service_children_parsing_model_element.append(
        OptionalMatchModelElement('/', FirstMatchModelElement('FirstMatchModelElement//optional', [
            FixedDataModelElement('FixedDataModelElement', b'The-searched-element-was-found!'), SequenceModelElement('se', [
                FixedDataModelElement('FixedDME', b'Any:'), AnyByteDataModelElement('AnyByteDataModelElement')])])))

    alphabet = b'ghijkl'
    service_children_ecd = []
    for _, char in enumerate(alphabet):
        char = bytes([char])
        service_children_ecd.append(FixedDataModelElement(char.decode(), char))

    parsing_model = FirstMatchModelElement('model', [
        SequenceModelElement('CronAnnouncement', service_children_cron_job_announcement),
        SequenceModelElement('CronExecution', service_children_cron_job_execution),
        SequenceModelElement('DailyCron', service_children_cron_job), SequenceModelElement('DiskReport', service_children_disk_report),
        SequenceModelElement('LoginDetails', service_children_login_details), DecimalIntegerValueModelElement('Random'),
        SequenceModelElement('RandomTime', service_children_random_time), SequenceModelElement('Sensors', service_children_sensors),
        SequenceModelElement('IPAddresses', service_children_user_ip_address), FirstMatchModelElement('type', service_children_audit),
        FirstMatchModelElement('ECD', service_children_ecd), FirstMatchModelElement('ParsingME', service_children_parsing_model_element)])

    # Some generic imports.
    from aminer.analysis import AtomFilters

    # Create all global handler lists here and append the real handlers later on.
    # Use this filter to distribute all atoms to the analysis handlers.
    atom_filter = AtomFilters.SubhandlerFilter(None)

    from aminer.analysis.TimestampCorrectionFilters import SimpleMonotonicTimestampAdjust
    simple_monotonic_timestamp_adjust = SimpleMonotonicTimestampAdjust([atom_filter])
    analysis_context.register_component(simple_monotonic_timestamp_adjust, component_name="SimpleMonotonicTimestampAdjust")

    from aminer.events.StreamPrinterEventHandler import StreamPrinterEventHandler
    from aminer.events.JsonConverterHandler import JsonConverterHandler
    stream_printer_event_handler = StreamPrinterEventHandler(analysis_context)
    json_converter_handler = JsonConverterHandler([stream_printer_event_handler], analysis_context)
    anomaly_event_handlers = [json_converter_handler]

    # Now define the AtomizerFactory using the model. A simple line based one is usually sufficient.
    from aminer.input.SimpleByteStreamLineAtomizerFactory import SimpleByteStreamLineAtomizerFactory
    analysis_context.atomizer_factory = SimpleByteStreamLineAtomizerFactory(parsing_model, [simple_monotonic_timestamp_adjust],
                                                                            anomaly_event_handlers)

    # Just report all unparsed atoms to the event handlers.
    from aminer.analysis.UnparsedAtomHandlers import SimpleUnparsedAtomHandler, VerboseUnparsedAtomHandler
    simple_unparsed_atom_handler = SimpleUnparsedAtomHandler(anomaly_event_handlers)
    atom_filter.add_handler(simple_unparsed_atom_handler, stop_when_handled_flag=False)
    analysis_context.register_component(simple_unparsed_atom_handler, component_name="SimpleUnparsedHandler")

    verbose_unparsed_atom_handler = VerboseUnparsedAtomHandler(anomaly_event_handlers, parsing_model)
    atom_filter.add_handler(verbose_unparsed_atom_handler, stop_when_handled_flag=True)
    analysis_context.register_component(verbose_unparsed_atom_handler, component_name="VerboseUnparsedHandler")

    from aminer.analysis.TimestampsUnsortedDetector import TimestampsUnsortedDetector
    timestamps_unsorted_detector = TimestampsUnsortedDetector(analysis_context.aminer_config, anomaly_event_handlers)
    atom_filter.add_handler(timestamps_unsorted_detector)
    analysis_context.register_component(timestamps_unsorted_detector, component_name="TimestampsUnsortedDetector")

    from aminer.analysis import Rules
    from aminer.analysis.AllowlistViolationDetector import AllowlistViolationDetector
    allowlist_rules = [
        Rules.OrMatchRule([
            Rules.AndMatchRule([
                Rules.PathExistsMatchRule('/model/LoginDetails/PastTime/Time/Minutes'),
                Rules.NegationMatchRule(Rules.ValueMatchRule('/model/LoginDetails/Username', b'root')),
                Rules.DebugMatchRule(debug_match_result=True)]),
            Rules.AndMatchRule([
                Rules.NegationMatchRule(Rules.PathExistsMatchRule('/model/LoginDetails/PastTime/Time/Minutes')),
                Rules.PathExistsMatchRule('/model/LoginDetails'),
                Rules.DebugMatchRule(debug_match_result=True)]),
            Rules.NegationMatchRule(Rules.PathExistsMatchRule('/model/LoginDetails'))])]

    # This rule list should trigger, when the line does not look like: User root (logged in, logged out)
    # or User 'username' (logged in, logged out) x minutes ago.
    allowlist_violation_detector = AllowlistViolationDetector(analysis_context.aminer_config, allowlist_rules, anomaly_event_handlers,
                                                              output_log_line=True)
    analysis_context.register_component(allowlist_violation_detector, component_name="Allowlist")
    atom_filter.add_handler(allowlist_violation_detector)

    from aminer.analysis.ParserCount import ParserCount
    parser_count = ParserCount(analysis_context.aminer_config, None, anomaly_event_handlers, 10)
    analysis_context.register_component(parser_count, component_name="ParserCount")
    atom_filter.add_handler(parser_count)

    from aminer.analysis.EventTypeDetector import EventTypeDetector
    etd = EventTypeDetector(analysis_context.aminer_config, anomaly_event_handlers)
    analysis_context.register_component(etd, component_name="EventTypeDetector")
    atom_filter.add_handler(etd)

    from aminer.analysis.VariableTypeDetector import VariableTypeDetector
    vtd = VariableTypeDetector(analysis_context.aminer_config, anomaly_event_handlers, etd, silence_output_except_indicator=False,
                               output_log_line=False, ignore_list=["/model/RandomTime"])
    analysis_context.register_component(vtd, component_name="VariableTypeDetector")
    atom_filter.add_handler(vtd)

    from aminer.analysis.VariableCorrelationDetector import VariableCorrelationDetector
    vtd = VariableCorrelationDetector(analysis_context.aminer_config, anomaly_event_handlers, etd, disc_div_thres=0.5,
                                      ignore_list=["/model/RandomTime"])
    analysis_context.register_component(vtd, component_name="VariableCorrelationDetector")
    atom_filter.add_handler(vtd)

    from aminer.analysis.EventCorrelationDetector import EventCorrelationDetector
    ecd = EventCorrelationDetector(analysis_context.aminer_config, anomaly_event_handlers, check_rules_flag=True,
                                   hypothesis_max_delta_time=1.0)
    analysis_context.register_component(ecd, component_name="EventCorrelationDetector")
    atom_filter.add_handler(ecd)

    from aminer.analysis.EventFrequencyDetector import EventFrequencyDetector
    efd = EventFrequencyDetector(analysis_context.aminer_config, anomaly_event_handlers, window_size=0.1)
    analysis_context.register_component(efd, component_name="EventFrequencyDetector")
    atom_filter.add_handler(efd)

    from aminer.analysis.EventSequenceDetector import EventSequenceDetector
    esd = EventSequenceDetector(analysis_context.aminer_config, anomaly_event_handlers, ['/model/ParsingME'], ignore_list=[
        '/model/ECD/g', '/model/ECD/h', '/model/ECD/i', '/model/ECD/j', '/model/ECD/k', '/model/ECD/l', '/model/Random',
        '/model/RandomTime', '/model/DailyCron'])
    analysis_context.register_component(esd, component_name="EventSequenceDetector")
    atom_filter.add_handler(esd)

    from aminer.analysis.MatchFilter import MatchFilter
    match_filter = MatchFilter(analysis_context.aminer_config, ['/model/Random'], anomaly_event_handlers, target_value_list=[
        1, 10, 100], output_log_line=True)
    analysis_context.register_component(match_filter, component_name="MatchFilter")
    atom_filter.add_handler(match_filter)

    from aminer.analysis.NewMatchPathDetector import NewMatchPathDetector
    new_match_path_detector = NewMatchPathDetector(analysis_context.aminer_config, anomaly_event_handlers, auto_include_flag=True,
                                                   output_log_line=True)
    analysis_context.register_component(new_match_path_detector, component_name="NewMatchPath")
    atom_filter.add_handler(new_match_path_detector)

    def tuple_transformation_function(match_value_list):
        """Only allow output of the EnhancedNewMatchPathValueComboDetector after every 10th element."""
        extra_data = enhanced_new_match_path_value_combo_detector.known_values_dict.get(tuple(match_value_list))
        if extra_data is not None:
            mod = 10
            if (extra_data[2] + 1) % mod == 0:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = False
            else:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = True
        return match_value_list

    from aminer.analysis.EnhancedNewMatchPathValueComboDetector import EnhancedNewMatchPathValueComboDetector
    enhanced_new_match_path_value_combo_detector = EnhancedNewMatchPathValueComboDetector(analysis_context.aminer_config, [
        '/model/DailyCron/UName', '/model/DailyCron/JobNumber'], anomaly_event_handlers, auto_include_flag=True,
        tuple_transformation_function=tuple_transformation_function, output_log_line=True)
    analysis_context.register_component(enhanced_new_match_path_value_combo_detector, component_name="EnhancedNewValueCombo")
    atom_filter.add_handler(enhanced_new_match_path_value_combo_detector)

    import re
    ip_match_action = Rules.EventGenerationMatchAction(
        "Analysis.Rules.IPv4InRFC1918MatchRule", "Private IP address occurred!", anomaly_event_handlers)

    vdmt = Rules.ValueDependentModuloTimeMatchRule(None, 3, ["/model/ECD/j", "/model/ECD/k", "/model/ECD/l"], {b"e": [0, 2.95]}, [0, 3])
    mt = Rules.ModuloTimeMatchRule(None, 3, 0, 3, None)
    time_allowlist_rules = [
        Rules.AndMatchRule([
            Rules.ParallelMatchRule([
                Rules.ValueDependentDelegatedMatchRule([
                    '/model/ECD/g', '/model/ECD/h', '/model/ECD/i', '/model/ECD/j', '/model/ECD/k', '/model/ECD/l'], {
                        (b"a",): mt, (b"b",): mt, (b"c",): mt, (b"d",): vdmt, (b"e",): vdmt, (b"f",): vdmt, None: mt}, mt),
                Rules.IPv4InRFC1918MatchRule("/model/ParsingME/se2/IpAddressDataModelElement", ip_match_action),
                Rules.DebugHistoryMatchRule(debug_match_result=True)
            ]),
            # IP addresses 8.8.8.8, 8.8.4.4 and 10.0.0.0 - 10.255.255.255 are not allowed
            Rules.NegationMatchRule(Rules.ValueListMatchRule("/model/ParsingME/se2/IpAddressDataModelElement", [134744072, 134743044])),
            Rules.NegationMatchRule(Rules.ValueRangeMatchRule("/model/ParsingME/se2/IpAddressDataModelElement", 167772160, 184549375)),
            Rules.NegationMatchRule(Rules.StringRegexMatchRule("/model/type/syscall/success", re.compile(b"^no$")))
        ])
    ]
    time_allowlist_violation_detector = AllowlistViolationDetector(
        analysis_context.aminer_config, time_allowlist_rules, anomaly_event_handlers, output_log_line=True)
    analysis_context.register_component(time_allowlist_violation_detector, component_name="TimeAllowlist")
    atom_filter.add_handler(time_allowlist_violation_detector)

    from aminer.analysis.HistogramAnalysis import HistogramAnalysis, LinearNumericBinDefinition, ModuloTimeBinDefinition, \
        PathDependentHistogramAnalysis
    modulo_time_bin_definition = ModuloTimeBinDefinition(86400, 3600, 0, 1, 24, True)
    linear_numeric_bin_definition = LinearNumericBinDefinition(50, 5, 20, True)
    histogram_analysis = HistogramAnalysis(analysis_context.aminer_config, [
        ('/model/RandomTime/Random', modulo_time_bin_definition), ('/model/Random', linear_numeric_bin_definition)], 10,
        anomaly_event_handlers, output_log_line=True)
    analysis_context.register_component(histogram_analysis, component_name="HistogramAnalysis")
    atom_filter.add_handler(histogram_analysis)

    path_dependent_histogram_analysis = PathDependentHistogramAnalysis(
        analysis_context.aminer_config, '/model/RandomTime', modulo_time_bin_definition, 10, anomaly_event_handlers, output_log_line=True)
    analysis_context.register_component(path_dependent_histogram_analysis, component_name="PathDependentHistogramAnalysis")
    atom_filter.add_handler(path_dependent_histogram_analysis)

    from aminer.analysis.MatchValueAverageChangeDetector import MatchValueAverageChangeDetector
    match_value_average_change_detector = MatchValueAverageChangeDetector(analysis_context.aminer_config, anomaly_event_handlers, None, [
        '/model/Random'], 100, 10, output_log_line=True)
    analysis_context.register_component(match_value_average_change_detector, component_name="MatchValueAverageChange")
    atom_filter.add_handler(match_value_average_change_detector)

    import sys
    from aminer.analysis.MatchValueStreamWriter import MatchValueStreamWriter
    match_value_stream_writer = MatchValueStreamWriter(
        sys.stdout, ['/model/Sensors/CPUTemp', '/model/Sensors/CPUWorkload', '/model/Sensors/DTM'], b';', b'')
    analysis_context.register_component(match_value_stream_writer, component_name="MatchValueStreamWriter")
    atom_filter.add_handler(match_value_stream_writer)

    from aminer.analysis.NewMatchPathValueComboDetector import NewMatchPathValueComboDetector
    new_match_path_value_combo_detector = NewMatchPathValueComboDetector(
        analysis_context.aminer_config, ['/model/IPAddresses/Username', '/model/IPAddresses/IP'],
        anomaly_event_handlers, output_log_line=True)
    analysis_context.register_component(new_match_path_value_combo_detector, component_name="NewMatchPathValueCombo")
    atom_filter.add_handler(new_match_path_value_combo_detector)

    from aminer.analysis.NewMatchIdValueComboDetector import NewMatchIdValueComboDetector
    new_match_id_value_combo_detector = NewMatchIdValueComboDetector(analysis_context.aminer_config, [
        '/model/type/path/name', '/model/type/syscall/syscall'], anomaly_event_handlers, id_path_list=[
        '/model/type/path/id', '/model/type/syscall/id'], min_allowed_time_diff=5, auto_include_flag=True, allow_missing_values_flag=True,
        output_log_line=True)
    analysis_context.register_component(new_match_id_value_combo_detector, component_name="NewMatchIdValueComboDetector")
    atom_filter.add_handler(new_match_id_value_combo_detector)

    from aminer.analysis.NewMatchPathValueDetector import NewMatchPathValueDetector
    new_match_path_value_detector = NewMatchPathValueDetector(analysis_context.aminer_config, [
        '/model/DailyCron/JobNumber', '/model/IPAddresses/Username'], anomaly_event_handlers, auto_include_flag=True, output_log_line=True)
    analysis_context.register_component(new_match_path_value_detector, component_name="NewMatchPathValue")
    atom_filter.add_handler(new_match_path_value_detector)

    from aminer.analysis.MissingMatchPathValueDetector import MissingMatchPathValueDetector
    missing_match_path_value_detector = MissingMatchPathValueDetector(
        analysis_context.aminer_config, ['/model/DiskReport/Space'], anomaly_event_handlers, auto_include_flag=True, default_interval=2,
        realert_interval=5, output_log_line=True)
    analysis_context.register_component(missing_match_path_value_detector, component_name="MissingMatch")
    atom_filter.add_handler(missing_match_path_value_detector)

    from aminer.analysis.TimeCorrelationDetector import TimeCorrelationDetector
    time_correlation_detector = TimeCorrelationDetector(
        analysis_context.aminer_config, anomaly_event_handlers, 2, min_rule_attributes=1, max_rule_attributes=5,
        record_count_before_event=10000, output_log_line=True)
    analysis_context.register_component(time_correlation_detector, component_name="TimeCorrelationDetector")
    atom_filter.add_handler(time_correlation_detector)

    from aminer.analysis.TimeCorrelationViolationDetector import TimeCorrelationViolationDetector, CorrelationRule, EventClassSelector
    cron_job_announcement = CorrelationRule('CronJobAnnouncement', 5, 6, max_artefacts_a_for_single_b=1, artefact_match_parameters=[
        ('/model/CronAnnouncement/JobNumber', '/model/CronExecution/JobNumber')])
    a_class_selector = EventClassSelector('Announcement', [cron_job_announcement], None)
    b_class_selector = EventClassSelector('Execution', None, [cron_job_announcement])
    rules = [Rules.PathExistsMatchRule('/model/CronAnnouncement/Run', a_class_selector),
             Rules.PathExistsMatchRule('/model/CronExecution/Job', b_class_selector)]

    time_correlation_violation_detector = TimeCorrelationViolationDetector(analysis_context.aminer_config, rules, anomaly_event_handlers,
                                                                           output_log_line=True)
    analysis_context.register_component(time_correlation_violation_detector, component_name="TimeCorrelationViolationDetector")
    atom_filter.add_handler(time_correlation_violation_detector)
def build_analysis_pipeline(analysis_context):
    """
    Define the function to create pipeline for parsing the log data.
    It has also to define an AtomizerFactory to instruct aminer how to process incoming data streams to create log atoms from them.
    """
    date_format_string = b'%Y-%m-%d %H:%M:%S'
    cron = b' cron['

    # Build the parsing model:

    service_children_disk_report = [
        FixedDataModelElement(
            'Space',
            b' Current Disk Data is: Filesystem     Type  Size  Used Avail Use%'
        ),
        DelimitedDataModelElement('Data', b'%'),
        AnyByteDataModelElement('Rest')
    ]

    service_children_login_details = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedWordlistDataModelElement('Status',
                                      [b' logged in', b' logged out']),
        OptionalMatchModelElement(
            'PastTime',
            SequenceModelElement('Time', [
                FixedDataModelElement('Blank', b' '),
                DecimalIntegerValueModelElement('Minutes'),
                FixedDataModelElement('Ago', b' minutes ago.')
            ]))
    ]

    service_children_cron_job = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('UNameSpace1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('UNameSpace2', b' '),
        DelimitedDataModelElement('User', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Details', b']: Job `cron.daily` started.')
    ]

    service_children_random_time = [
        FixedDataModelElement('Space', b'Random: '),
        DecimalIntegerValueModelElement('Random')
    ]

    service_children_sensors = [
        SequenceModelElement('CPUTemp', [
            FixedDataModelElement('FixedTemp', b'CPU Temp: '),
            DecimalIntegerValueModelElement('Temp'),
            FixedDataModelElement('Degrees', b'\xc2\xb0C')
        ]),
        FixedDataModelElement('Space1', b', '),
        SequenceModelElement('CPUWorkload', [
            FixedDataModelElement('Fixed Workload', b'CPU Workload: '),
            DecimalIntegerValueModelElement('Workload'),
            FixedDataModelElement('Percent', b'%')
        ]),
        FixedDataModelElement('Space2', b', '),
        DateTimeModelElement('DTM', date_format_string)
    ]

    service_children_user_ip_address = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedDataModelElement('Action', b' changed IP address to '),
        IpAddressDataModelElement('IP')
    ]

    service_children_cron_job_announcement = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('Space', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Run', b']: Will run job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Start Time', b'\' in 5 min.')
    ]

    service_children_cron_job_execution = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('Space1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Job', b']: Job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Started', b'\' started')
    ]

    service_children_parsing_model_element = [
        DateTimeModelElement('DateTimeModelElement',
                             b'Current DateTime: %d.%m.%Y %H:%M:%S'),
        DecimalFloatValueModelElement('DecimalFloatValueModelElement',
                                      value_sign_type='optional'),
        DecimalIntegerValueModelElement('DecimalIntegerValueModelElement',
                                        value_sign_type='optional',
                                        value_pad_type='blank'),
        SequenceModelElement('', [
            DelimitedDataModelElement('DelimitedDataModelElement', b';'),
            FixedDataModelElement('FixedDataModelElement', b';')
        ])
    ]

    # ElementValueBranchModelElement
    fixed_data_me1 = FixedDataModelElement("fixed1", b'match ')
    fixed_data_me2 = FixedDataModelElement("fixed2", b'fixed String')
    fixed_wordlist_data_model_element = FixedWordlistDataModelElement(
        "wordlist", [b'data: ', b'string: '])
    decimal_integer_value_model_element = DecimalIntegerValueModelElement(
        "decimal")

    service_children_parsing_model_element.append(
        ElementValueBranchModelElement(
            'ElementValueBranchModelElement',
            FirstMatchModelElement("first", [
                SequenceModelElement(
                    "seq1",
                    [fixed_data_me1, fixed_wordlist_data_model_element]),
                SequenceModelElement("seq2", [
                    fixed_data_me1, fixed_wordlist_data_model_element,
                    fixed_data_me2
                ])
            ]), "wordlist", {
                0: decimal_integer_value_model_element,
                1: fixed_data_me2
            }))
    service_children_parsing_model_element.append(
        HexStringModelElement('HexStringModelElement'))
    service_children_parsing_model_element.append(
        SequenceModelElement('', [
            FixedDataModelElement('FixedDataModelElement',
                                  b'Gateway IP-Address: '),
            IpAddressDataModelElement('IpAddressDataModelElement')
        ]))
    service_children_parsing_model_element.append(
        MultiLocaleDateTimeModelElement('MultiLocaleDateTimeModelElement',
                                        [(b'%b %d %Y', "de_AT.utf8", None)]))
    service_children_parsing_model_element.append(
        RepeatedElementDataModelElement(
            'RepeatedElementDataModelElement',
            SequenceModelElement('SequenceModelElement', [
                FixedDataModelElement('FixedDataModelElement',
                                      b'drawn number: '),
                DecimalIntegerValueModelElement(
                    'DecimalIntegerValueModelElement')
            ]), 1))
    service_children_parsing_model_element.append(
        VariableByteDataModelElement('VariableByteDataModelElement', b'-@#'))
    service_children_parsing_model_element.append(
        SequenceModelElement('', [
            WhiteSpaceLimitedDataModelElement(
                'WhiteSpaceLimitedDataModelElement'),
            FixedDataModelElement('', b' ')
        ]))

    # The Base64StringModelElement must be just before the AnyByteDataModelElement to avoid unexpected Matches.
    service_children_parsing_model_element.append(
        Base64StringModelElement('Base64StringModelElement'))

    # The OptionalMatchModelElement must be paired with a FirstMatchModelElement because it accepts all data and thus no data gets
    # to the AnyByteDataModelElement. The AnyByteDataModelElement must be last, because all bytes are accepted.
    service_children_parsing_model_element.append(
        OptionalMatchModelElement(
            'OptionalMatchModelElement',
            FirstMatchModelElement('FirstMatchModelElement', [
                FixedDataModelElement('FixedDataModelElement',
                                      b'The-searched-element-was-found!'),
                AnyByteDataModelElement('AnyByteDataModelElement')
            ])))

    parsing_model = FirstMatchModelElement('model', [
        SequenceModelElement('CronAnnouncement',
                             service_children_cron_job_announcement),
        SequenceModelElement('CronExecution',
                             service_children_cron_job_execution),
        SequenceModelElement('DailyCron', service_children_cron_job),
        SequenceModelElement('DiskReport', service_children_disk_report),
        SequenceModelElement('LoginDetails', service_children_login_details),
        DecimalIntegerValueModelElement('Random'),
        SequenceModelElement('RandomTime', service_children_random_time),
        SequenceModelElement('Sensors', service_children_sensors),
        SequenceModelElement('IPAddresses', service_children_user_ip_address),
        FirstMatchModelElement('ParsingME',
                               service_children_parsing_model_element)
    ])

    # Some generic imports.
    from aminer.analysis import AtomFilters

    # Create all global handler lists here and append the real handlers
    # later on.
    # Use this filter to distribute all atoms to the analysis handlers.
    atom_filter = AtomFilters.SubhandlerFilter(None)

    from aminer.analysis.TimestampCorrectionFilters import SimpleMonotonicTimestampAdjust
    simple_monotonic_timestamp_adjust = SimpleMonotonicTimestampAdjust(
        [atom_filter])
    analysis_context.register_component(
        simple_monotonic_timestamp_adjust,
        component_name="SimpleMonotonicTimestampAdjust")

    from aminer.events.StreamPrinterEventHandler import StreamPrinterEventHandler
    stream_printer_event_handler = StreamPrinterEventHandler(analysis_context)
    from aminer.events.SyslogWriterEventHandler import SyslogWriterEventHandler
    syslog_event_handler = SyslogWriterEventHandler(analysis_context)
    from aminer.events.DefaultMailNotificationEventHandler import DefaultMailNotificationEventHandler
    if DefaultMailNotificationEventHandler.CONFIG_KEY_MAIL_TARGET_ADDRESS in analysis_context.aminer_config.config_properties:
        mail_notification_handler = DefaultMailNotificationEventHandler(
            analysis_context)
        analysis_context.register_component(mail_notification_handler,
                                            component_name="MailHandler")
    anomaly_event_handlers = [
        stream_printer_event_handler, syslog_event_handler,
        mail_notification_handler
    ]

    # Now define the AtomizerFactory using the model. A simple line based one is usually sufficient.
    from aminer.input.SimpleByteStreamLineAtomizerFactory import SimpleByteStreamLineAtomizerFactory
    analysis_context.atomizer_factory = SimpleByteStreamLineAtomizerFactory(
        parsing_model, [simple_monotonic_timestamp_adjust],
        anomaly_event_handlers)

    # Just report all unparsed atoms to the event handlers.
    from aminer.analysis.UnparsedAtomHandlers import SimpleUnparsedAtomHandler
    simple_unparsed_atom_handler = SimpleUnparsedAtomHandler(
        anomaly_event_handlers)
    atom_filter.add_handler(simple_unparsed_atom_handler,
                            stop_when_handled_flag=True)
    analysis_context.register_component(simple_unparsed_atom_handler,
                                        component_name="UnparsedHandler")

    from aminer.analysis.TimestampsUnsortedDetector import TimestampsUnsortedDetector
    timestamps_unsorted_detector = TimestampsUnsortedDetector(
        analysis_context.aminer_config, anomaly_event_handlers)
    atom_filter.add_handler(timestamps_unsorted_detector)
    analysis_context.register_component(
        timestamps_unsorted_detector,
        component_name="TimestampsUnsortedDetector")

    from aminer.analysis import Rules
    from aminer.analysis.AllowlistViolationDetector import AllowlistViolationDetector
    allowlist_rules = [
        Rules.OrMatchRule([
            Rules.AndMatchRule([
                Rules.PathExistsMatchRule(
                    '/model/LoginDetails/PastTime/Time/Minutes'),
                Rules.NegationMatchRule(
                    Rules.ValueMatchRule('/model/LoginDetails/Username',
                                         b'root'))
            ]),
            Rules.AndMatchRule([
                Rules.NegationMatchRule(
                    Rules.PathExistsMatchRule(
                        '/model/LoginDetails/PastTime/Time/Minutes')),
                Rules.PathExistsMatchRule('/model/LoginDetails')
            ]),
            Rules.NegationMatchRule(
                Rules.PathExistsMatchRule('/model/LoginDetails'))
        ])
    ]

    # This rule list should trigger, when the line does not look like: User root (logged in, logged out)
    # or User 'username' (logged in, logged out) x minutes ago.
    allowlist_violation_detector = AllowlistViolationDetector(
        analysis_context.aminer_config, allowlist_rules,
        anomaly_event_handlers)
    analysis_context.register_component(allowlist_violation_detector,
                                        component_name="Allowlist")
    atom_filter.add_handler(allowlist_violation_detector)

    from aminer.analysis.NewMatchPathDetector import NewMatchPathDetector
    new_match_path_detector = NewMatchPathDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_detector,
                                        component_name="NewMatchPath")
    atom_filter.add_handler(new_match_path_detector)

    def tuple_transformation_function(match_value_list):
        """Only allow output of the EnhancedNewMatchPathValueComboDetector after every 10000th element."""
        extra_data = enhanced_new_match_path_value_combo_detector.known_values_dict.get(
            tuple(match_value_list))
        if extra_data is not None:
            mod = 10000
            if (extra_data[2] + 1) % mod == 0:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = False
            else:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = True
        return match_value_list

    from aminer.analysis.EnhancedNewMatchPathValueComboDetector import EnhancedNewMatchPathValueComboDetector
    enhanced_new_match_path_value_combo_detector = EnhancedNewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/UName', '/model/DailyCron/JobNumber'],
        anomaly_event_handlers,
        auto_include_flag=True,
        tuple_transformation_function=tuple_transformation_function)
    analysis_context.register_component(
        enhanced_new_match_path_value_combo_detector,
        component_name="EnhancedNewValueCombo")
    atom_filter.add_handler(enhanced_new_match_path_value_combo_detector)

    from aminer.analysis.HistogramAnalysis import HistogramAnalysis, LinearNumericBinDefinition, ModuloTimeBinDefinition, \
        PathDependentHistogramAnalysis
    modulo_time_bin_definition = ModuloTimeBinDefinition(
        86400, 3600, 0, 1, 24, True)
    linear_numeric_bin_definition = LinearNumericBinDefinition(50, 5, 20, True)
    histogram_analysis = HistogramAnalysis(
        analysis_context.aminer_config,
        [('/model/RandomTime/Random', modulo_time_bin_definition),
         ('/model/Random', linear_numeric_bin_definition)], 10,
        anomaly_event_handlers)
    analysis_context.register_component(histogram_analysis,
                                        component_name="HistogramAnalysis")
    atom_filter.add_handler(histogram_analysis)

    path_dependent_histogram_analysis = PathDependentHistogramAnalysis(
        analysis_context.aminer_config, '/model/RandomTime',
        modulo_time_bin_definition, 10, anomaly_event_handlers)
    analysis_context.register_component(
        path_dependent_histogram_analysis,
        component_name="PathDependentHistogramAnalysis")
    atom_filter.add_handler(path_dependent_histogram_analysis)

    from aminer.analysis.MatchValueAverageChangeDetector import MatchValueAverageChangeDetector
    match_value_average_change_detector = MatchValueAverageChangeDetector(
        analysis_context.aminer_config, anomaly_event_handlers, None,
        ['/model/Random'], 100, 10)
    analysis_context.register_component(
        match_value_average_change_detector,
        component_name="MatchValueAverageChange")
    atom_filter.add_handler(match_value_average_change_detector)

    import sys
    from aminer.analysis.MatchValueStreamWriter import MatchValueStreamWriter
    match_value_stream_writer = MatchValueStreamWriter(sys.stdout, [
        '/model/Sensors/CPUTemp', '/model/Sensors/CPUWorkload',
        '/model/Sensors/DTM'
    ], b';', b'')
    analysis_context.register_component(
        match_value_stream_writer, component_name="MatchValueStreamWriter")
    atom_filter.add_handler(match_value_stream_writer)

    from aminer.analysis.NewMatchPathValueComboDetector import NewMatchPathValueComboDetector
    new_match_path_value_combo_detector = NewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/IPAddresses/Username', '/model/IPAddresses/IP'],
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(
        new_match_path_value_combo_detector,
        component_name="NewMatchPathValueCombo")
    atom_filter.add_handler(new_match_path_value_combo_detector)

    from aminer.analysis.NewMatchPathValueDetector import NewMatchPathValueDetector
    new_match_path_value_detector = NewMatchPathValueDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/JobNumber', '/model/IPAddresses/Username'],
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_value_detector,
                                        component_name="NewMatchPathValue")
    atom_filter.add_handler(new_match_path_value_detector)

    from aminer.analysis.MissingMatchPathValueDetector import MissingMatchPathValueDetector
    missing_match_path_value_detector = MissingMatchPathValueDetector(
        analysis_context.aminer_config, ['/model/DiskReport/Space'],
        anomaly_event_handlers,
        auto_include_flag=True,
        default_interval=2,
        realert_interval=5)
    analysis_context.register_component(missing_match_path_value_detector,
                                        component_name="MissingMatch")
    atom_filter.add_handler(missing_match_path_value_detector)

    from aminer.analysis.TimeCorrelationDetector import TimeCorrelationDetector
    time_correlation_detector = TimeCorrelationDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        2,
        min_rule_attributes=1,
        max_rule_attributes=5,
        record_count_before_event=70000,
        output_log_line=True)
    analysis_context.register_component(
        time_correlation_detector, component_name="TimeCorrelationDetector")
    atom_filter.add_handler(time_correlation_detector)

    from aminer.analysis.TimeCorrelationViolationDetector import TimeCorrelationViolationDetector, CorrelationRule, EventClassSelector
    cron_job_announcement = CorrelationRule(
        'CronJobAnnouncement',
        5,
        6,
        max_artefacts_a_for_single_b=1,
        artefact_match_parameters=[('/model/CronAnnouncement/JobNumber',
                                    '/model/CronExecution/JobNumber')])
    a_class_selector = EventClassSelector('Announcement',
                                          [cron_job_announcement], None)
    b_class_selector = EventClassSelector('Execution', None,
                                          [cron_job_announcement])
    rules = [
        Rules.PathExistsMatchRule('/model/CronAnnouncement/Run',
                                  a_class_selector),
        Rules.PathExistsMatchRule('/model/CronExecution/Job', b_class_selector)
    ]

    time_correlation_violation_detector = TimeCorrelationViolationDetector(
        analysis_context.aminer_config, rules, anomaly_event_handlers)
    analysis_context.register_component(
        time_correlation_violation_detector,
        component_name="TimeCorrelationViolationDetector")
    atom_filter.add_handler(time_correlation_violation_detector)