def test2persist_multiple_objects_of_multiple_class(self):
        """In this test case multiple instances of multiple classes are to be persisted and loaded."""
        description = "Test2PersistenceUtil"
        new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler],
            'Default2', True)
        self.analysis_context.register_component(new_match_path_detector,
                                                 description)

        t = time.time()
        log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data,
                                     ParserMatch(self.match_element_fixed_dme),
                                     t, new_match_path_detector)
        log_atom_decimal_integer_value_me = LogAtom(
            self.match_context_decimal_integer_value_me.match_data,
            ParserMatch(self.match_element_decimal_integer_value_me), t,
            new_match_path_detector)
        new_match_path_detector.receive_atom(log_atom_fixed_dme)
        new_match_path_detector.receive_atom(log_atom_decimal_integer_value_me)

        other_new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler],
            'otherDetector2', True)
        self.analysis_context.register_component(other_new_match_path_detector,
                                                 description + "2")
        log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data,
                                     ParserMatch(self.match_element_fixed_dme),
                                     t, other_new_match_path_detector)
        other_new_match_path_detector.receive_atom(log_atom_fixed_dme)

        new_match_path_value_combo_detector = NewMatchPathValueComboDetector(
            self.aminer_config, ['first/f1/s1'],
            [self.stream_printer_event_handler], 'Default', False, True)
        self.analysis_context.register_component(
            new_match_path_value_combo_detector, description + "3")
        log_atom_sequence_me = LogAtom(
            self.fixed_dme.fixed_data,
            ParserMatch(self.match_element_first_match_me), t,
            new_match_path_value_combo_detector)
        new_match_path_value_combo_detector.receive_atom(log_atom_sequence_me)

        PersistenceUtil.persist_all()
        persistence_data = PersistenceUtil.load_json(
            new_match_path_detector.persistence_file_name)
        self.assertTrue(persistence_data in ([
            self.match_element_fixed_dme.get_path(),
            self.match_element_decimal_integer_value_me.get_path()
        ], [
            self.match_element_decimal_integer_value_me.get_path(),
            self.match_element_fixed_dme.get_path()
        ]))
        self.assertEqual(
            PersistenceUtil.load_json(
                other_new_match_path_detector.persistence_file_name),
            [self.match_element_fixed_dme.get_path()])
        self.assertEqual(
            PersistenceUtil.load_json(
                new_match_path_value_combo_detector.persistence_file_name),
            ([[log_atom_sequence_me.raw_data]]))
    def run_analysis(self, master_fd):
        """
        Run the analysis thread.
        @param master_fd the main communication socket to the parent to receive logfile updates from the parent.
        @return 0 on success, e.g. normal termination via signal or 1 on error.
        """
        # The masterControlSocket is the socket to communicate with the master process to receive commands or logstream data. Expect
        # the parent/child communication socket on fd 3. This also duplicates the fd, so close the old one.
        self.master_control_socket = socket.fromfd(master_fd, socket.AF_UNIX, socket.SOCK_DGRAM, 0)
        os.close(master_fd)
        self.tracked_fds_dict[self.master_control_socket.fileno()] = self.master_control_socket

        # Locate the real analysis configuration.
        self.analysis_context.build_analysis_pipeline()
        if self.analysis_context.atomizer_factory is None:
            msg = 'build_analysis_pipeline() did not initialize atomizer_factory, terminating'
            print('FATAL: ' + msg, file=sys.stderr)
            logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).critical(msg)
            return 1

        real_time_triggered_components = self.analysis_context.real_time_triggered_components
        analysis_time_triggered_components = self.analysis_context.analysis_time_triggered_components

        max_memory_mb = self.analysis_context.aminer_config.config_properties.get(AMinerConfig.KEY_RESOURCES_MAX_MEMORY_USAGE, None)
        if max_memory_mb is not None:
            try:
                max_memory_mb = int(max_memory_mb)
                resource.setrlimit(resource.RLIMIT_AS, (max_memory_mb * 1024 * 1024, resource.RLIM_INFINITY))
                logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).debug('set max memory limit to %d MB.', max_memory_mb)
            except ValueError:
                msg = '%s must be an integer, terminating' % AMinerConfig.KEY_RESOURCES_MAX_MEMORY_USAGE
                print('FATAL: ' + msg, file=sys.stderr)
                logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).critical(msg)
                return 1

        # Load continuation data for last known log streams. The loaded data has to be a dictionary with repositioning information for
        # each stream. The data is used only when creating the first stream with that name.
        self.repositioning_data_dict = PersistenceUtil.load_json(self.persistence_file_name)
        if self.repositioning_data_dict is None:
            self.repositioning_data_dict = {}

        # A list of LogStreams where handleStream() blocked due to downstream not being able to consume the data yet.
        blocked_log_streams = []

        # Always start when number is None.
        next_real_time_trigger_time = None
        next_analysis_time_trigger_time = None
        next_backup_time_trigger_time = None
        log_stat_period = self.analysis_context.aminer_config.config_properties.get(
            AMinerConfig.KEY_LOG_STAT_PERIOD, AMinerConfig.DEFAULT_STAT_PERIOD)
        next_statistics_log_time = time.time() + log_stat_period

        delayed_return_status = 0
        while self.run_analysis_loop_flag:
            # Build the list of inputs to select for anew each time: the LogStream file descriptors may change due to rollover.
            input_select_fd_list = []
            output_select_fd_list = []
            for fd_handler_object in self.tracked_fds_dict.values():
                if isinstance(fd_handler_object, LogStream):
                    stream_fd = fd_handler_object.get_current_fd()
                    if stream_fd < 0:
                        continue
                    input_select_fd_list.append(stream_fd)
                elif isinstance(fd_handler_object, AnalysisChildRemoteControlHandler):
                    fd_handler_object.add_select_fds(input_select_fd_list, output_select_fd_list)
                else:
                    # This has to be a socket, just add the file descriptor.
                    input_select_fd_list.append(fd_handler_object.fileno())

            # Loop over the list in reverse order to avoid skipping elements in remove.
            if not suspended_flag:
                for log_stream in reversed(blocked_log_streams):
                    current_stream_fd = log_stream.handle_stream()
                    if current_stream_fd >= 0:
                        self.tracked_fds_dict[current_stream_fd] = log_stream
                        input_select_fd_list.append(current_stream_fd)
                        blocked_log_streams.remove(log_stream)

            read_list = None
            write_list = None
            try:
                (read_list, write_list, _except_list) = select.select(input_select_fd_list, output_select_fd_list, [], 1)
            except select.error as select_error:
                # Interrupting signals, e.g. for shutdown are OK.
                if select_error[0] == errno.EINTR:
                    continue
                msg = 'Unexpected select result %s' % str(select_error)
                print(msg, file=sys.stderr)
                logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg)
                delayed_return_status = 1
                break
            for read_fd in read_list:
                fd_handler_object = self.tracked_fds_dict[read_fd]
                if isinstance(fd_handler_object, LogStream):
                    # Handle this LogStream. Only when downstream processing blocks, add the stream to the blocked stream list.
                    handle_result = fd_handler_object.handle_stream()
                    if handle_result < 0:
                        # No need to care if current internal file descriptor in LogStream has changed in handleStream(),
                        # this will be handled when unblocking.
                        del self.tracked_fds_dict[read_fd]
                        blocked_log_streams.append(fd_handler_object)
                    elif handle_result != read_fd:
                        # The current fd has changed, update the tracking list.
                        del self.tracked_fds_dict[read_fd]
                        self.tracked_fds_dict[handle_result] = fd_handler_object
                    continue

                if isinstance(fd_handler_object, AnalysisChildRemoteControlHandler):
                    try:
                        fd_handler_object.do_receive()
                    except ConnectionError as receiveException:
                        msg = 'Unclean termination of remote control: %s' % str(receiveException)
                        logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg)
                        print(msg, file=sys.stderr)
                    if fd_handler_object.is_dead():
                        logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).debug('Deleting fd %s from tracked_fds_dict.', str(read_fd))
                        del self.tracked_fds_dict[read_fd]
                    # Reading is only attempted when output buffer was already flushed. Try processing the next request to fill the output
                    # buffer for next round.
                    else:
                        fd_handler_object.do_process(self.analysis_context)
                    continue

                if fd_handler_object == self.master_control_socket:
                    self.handle_master_control_socket_receive()
                    continue

                if fd_handler_object == self.remote_control_socket:
                    # We received a remote connection, accept it unconditionally. Users should make sure, that they do not exhaust
                    # resources by hogging open connections.
                    (control_client_socket, _remote_address) = self.remote_control_socket.accept()
                    # Keep track of information received via this remote control socket.
                    remote_control_handler = AnalysisChildRemoteControlHandler(control_client_socket)
                    self.tracked_fds_dict[control_client_socket.fileno()] = remote_control_handler
                    continue
                msg = 'Unhandled object type %s' % type(fd_handler_object)
                logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg)
                raise Exception(msg)

            for write_fd in write_list:
                fd_handler_object = self.tracked_fds_dict[write_fd]
                if isinstance(fd_handler_object, AnalysisChildRemoteControlHandler):
                    buffer_flushed_flag = False
                    try:
                        buffer_flushed_flag = fd_handler_object.do_send()
                    except OSError as sendError:
                        msg = 'Error at sending data via remote control: %s' % str(sendError)
                        print(msg, file=sys.stderr)
                        logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg)
                        try:
                            fd_handler_object.terminate()
                        except ConnectionError as terminateException:
                            msg = 'Unclean termination of remote control: %s' % str(terminateException)
                            print(msg, file=sys.stderr)
                            logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg)
                    if buffer_flushed_flag:
                        fd_handler_object.do_process(self.analysis_context)
                    if fd_handler_object.is_dead():
                        del self.tracked_fds_dict[write_fd]
                    continue
                msg = 'Unhandled object type %s' % type(fd_handler_object)
                logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg)
                raise Exception(msg)

            # Handle the real time events.
            real_time = time.time()
            if next_real_time_trigger_time is None or real_time >= next_real_time_trigger_time:
                next_trigger_offset = 3600
                for component in real_time_triggered_components:
                    if not suspended_flag:
                        next_trigger_request = component.do_timer(real_time)
                    next_trigger_offset = min(next_trigger_offset, next_trigger_request)
                next_real_time_trigger_time = real_time + next_trigger_offset

            if real_time >= next_statistics_log_time:
                next_statistics_log_time = real_time + log_stat_period
                logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).debug('Statistics logs are written..')
                # log the statistics for every component.
                for component_name in self.analysis_context.registered_components_by_name:
                    component = self.analysis_context.registered_components_by_name[component_name]
                    component.log_statistics(component_name)

            # Handle the analysis time events. The analysis time will be different when an analysis time component is registered.
            analysis_time = self.analysis_context.analysis_time
            if analysis_time is None:
                analysis_time = real_time
            if next_analysis_time_trigger_time is None or analysis_time >= next_analysis_time_trigger_time:
                next_trigger_offset = 3600
                for component in analysis_time_triggered_components:
                    if not suspended_flag:
                        next_trigger_request = component.do_timer(real_time)
                    next_trigger_offset = min(next_trigger_offset, next_trigger_request)
                next_analysis_time_trigger_time = analysis_time + next_trigger_offset

            # backup the persistence data.
            backup_time = time.time()
            backup_time_str = datetime.fromtimestamp(backup_time).strftime('%Y-%m-%d-%H-%M-%S')
            persistence_dir = self.analysis_context.aminer_config.config_properties.get(
                AMinerConfig.KEY_PERSISTENCE_DIR, AMinerConfig.DEFAULT_PERSISTENCE_DIR)
            persistence_dir = persistence_dir.rstrip('/')
            backup_path = persistence_dir + '/backup/'
            backup_path_with_date = os.path.join(backup_path, backup_time_str)
            if next_backup_time_trigger_time is None or backup_time >= next_backup_time_trigger_time:
                next_trigger_offset = 3600 * 24
                if next_backup_time_trigger_time is not None:
                    shutil.copytree(persistence_dir, backup_path_with_date, ignore=shutil.ignore_patterns('backup*'))
                    logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).info('Persistence backup created in %s.', backup_path_with_date)
                next_backup_time_trigger_time = backup_time + next_trigger_offset

        # Analysis loop is only left on shutdown. Try to persist everything and leave.
        PersistenceUtil.persist_all()
        for sock in self.tracked_fds_dict.values():
            sock.close()
        return delayed_return_status
示例#3
0
 def persist_all(self):
     """Persist all data by calling the function in PersistenceUtil."""
     PersistenceUtil.persist_all()
     self.REMOTE_CONTROL_RESPONSE = 'OK'
     logging.getLogger(DEBUG_LOG_NAME).info(
         'Called persist_all() via remote control.')