def test_eval_filter(self): db_events_filter = EventsSeverityChangerFilter( new_severity=Severity.NORMAL, event_class=DatabaseLogEvent) event = DatabaseLogEvent.BAD_ALLOC() self.assertEqual(event.severity, Severity.ERROR) db_events_filter.eval_filter(event) self.assertEqual(event.severity, Severity.NORMAL)
def ignore_alternator_client_errors(): with ExitStack() as stack: stack.enter_context( EventsSeverityChangerFilter( new_severity=Severity.WARNING, event_class=PrometheusAlertManagerEvent, regex=".*YCSBTooManyErrors.*", extra_time_to_expiration=60, )) stack.enter_context( EventsSeverityChangerFilter( new_severity=Severity.WARNING, event_class=PrometheusAlertManagerEvent, regex=".*YCSBTooManyVerifyErrors.*", extra_time_to_expiration=60, )) stack.enter_context( EventsSeverityChangerFilter( new_severity=Severity.WARNING, event_class=YcsbStressEvent, regex=r".*Cannot achieve consistency level.*", extra_time_to_expiration=30, )) stack.enter_context( EventsSeverityChangerFilter( new_severity=Severity.WARNING, event_class=YcsbStressEvent, regex=r".*Operation timed out.*", extra_time_to_expiration=30, )) yield
def ignore_operation_errors(): with ExitStack() as stack: stack.enter_context(EventsSeverityChangerFilter( new_severity=Severity.WARNING, event_class=LogEvent, regex=r".*Operation timed out.*", extra_time_to_expiration=30, )) stack.enter_context(EventsSeverityChangerFilter( new_severity=Severity.WARNING, event_class=LogEvent, regex=r".*Operation failed for system.paxos.*", extra_time_to_expiration=30, )) yield
def test_severity_changer_db_log(self): """ See https://github.com/scylladb/scylla-cluster-tests/issues/2115 """ # 1) Lower DatabaseLogEvent to WARNING for 1 sec. with self.wait_for_n_events(self.get_events_logger(), count=4, timeout=3): with EventsSeverityChangerFilter(new_severity=Severity.WARNING, event_class=DatabaseLogEvent, extra_time_to_expiration=1): DatabaseLogEvent.NO_SPACE_ERROR() \ .add_info(node="A", line_number=22, line="critical that should be lowered #1") \ .publish() DatabaseLogEvent.NO_SPACE_ERROR() \ .add_info(node="A", line_number=22, line="critical that should be lowered #2") \ .publish() log_content = self.get_event_log_file("warning.log") self.assertIn("DatabaseLogEvent", log_content) self.assertIn("critical that should be lowered #1", log_content) self.assertIn("critical that should be lowered #2", log_content) # 2) One of the next DatabaseLogEvent event should expire the EventsSeverityChangerFilter # (and not crash all subscribers) with self.wait_for_n_events(self.get_events_logger(), count=2, timeout=3): for _ in range(2): time.sleep(1) DatabaseLogEvent.NO_SPACE_ERROR() \ .add_info(node="A", line_number=22, line="critical that shouldn't be lowered") \ .publish() log_content = self.get_event_log_file("error.log") self.assertIn("critical that shouldn't be lowered", log_content)
def start_events_device( log_dir: Optional[Union[str, Path]] = None, _registry: Optional[EventsProcessesRegistry] = None) -> None: if _registry is None: if log_dir is None: raise RuntimeError( "Should provide log_dir or instance of EventsProcessesRegistry" ) _registry = create_default_events_process_registry(log_dir=log_dir) start_events_main_device(_registry=_registry) time.sleep(EVENTS_DEVICE_START_DELAY) start_events_logger(_registry=_registry) start_grafana_pipeline(_registry=_registry) start_events_analyzer(_registry=_registry) time.sleep(EVENTS_SUBSCRIBERS_START_DELAY) # Default filters. EventsSeverityChangerFilter( new_severity=Severity.WARNING, event_class=DatabaseLogEvent.DATABASE_ERROR, regex= r'.*workload prioritization - update_service_levels_from_distributed_data: an ' r'error occurred while retrieving configuration').publish() DbEventsFilter(db_event=DatabaseLogEvent.BACKTRACE, line='Rate-limit: supressed').publish() DbEventsFilter(db_event=DatabaseLogEvent.BACKTRACE, line='Rate-limit: suppressed').publish() atexit.register(stop_events_device, _registry=_registry)
def test_severity_changer(self): extra_time_to_expiration = 10 with self.wait_for_n_events(self.get_events_logger(), count=5, timeout=3): with EventsSeverityChangerFilter( new_severity=Severity.WARNING, event_class=TestFrameworkEvent, extra_time_to_expiration=extra_time_to_expiration): TestFrameworkEvent(source="critical that should be lowered #1", source_method="", severity=Severity.CRITICAL).publish() TestFrameworkEvent(source="critical that should be lowered #2", source_method="", severity=Severity.CRITICAL).publish() event = TestFrameworkEvent( source="critical that should not be lowered #3", source_method="", severity=Severity.CRITICAL) event.source_timestamp = time.time() + extra_time_to_expiration event.publish() log_content = self.get_event_log_file("warning.log") crit_log_content = self.get_event_log_file("critical.log") self.assertIn("TestFrameworkEvent", log_content) self.assertIn("critical that should be lowered #1", log_content) self.assertIn("critical that should be lowered #2", log_content) self.assertNotIn("critical that should not be lowered #3", log_content) self.assertIn("critical that should not be lowered #3", crit_log_content)
def _stop_load_when_nemesis_threads_end(self): for nemesis_thread in self.db_cluster.nemesis_threads: nemesis_thread.join() with EventsSeverityChangerFilter( new_severity=Severity. NORMAL, # killing stress creates Critical error event_class=CassandraStressEvent, extra_time_to_expiration=60): self.loaders.kill_cassandra_stress_thread()
def ignore_stream_mutation_fragments_errors(): with ExitStack() as stack: stack.enter_context( EventsSeverityChangerFilter( new_severity=Severity.WARNING, event_class=DatabaseLogEvent, regex=r".*Failed to handle STREAM_MUTATION_FRAGMENTS.*", extra_time_to_expiration=30)) yield
def _stop_load_after_one_nemesis_cycle(self): time.sleep(300) # wait 5 minutes to be sure nemesis has started self.db_cluster.stop_nemesis( timeout=None ) # wait for Nemesis to end and don't start another cycle with EventsSeverityChangerFilter( new_severity=Severity. NORMAL, # killing stress creates Critical error event_class=CassandraStressEvent, extra_time_to_expiration=60): self.loaders.kill_cassandra_stress_thread()