def main(): utils.setup_cli() alarm_name = 'alarm_crud' metric_name = 'alarm_crud' base_dimension = 'service=alarm_test' expression = 'max(%s{%s}) > 0' % (metric_name, base_dimension) description = alarm_name + ' Description' cli_wrapper.delete_alarm_if_exists(alarm_name) # Add Alarm alarm_id = cli_wrapper.create_alarm(alarm_name, expression, description=description) print('Created Alarm with id %s' % alarm_id) # Ensure it is created in the right state initial_state = 'UNDETERMINED' if not utils.check_alarm_state(alarm_id, initial_state): return 1 states = [] states.append(initial_state) # List Alarms, make sure new one shows up alarm_json = cli_wrapper.find_alarm_by_name(alarm_name) if alarm_json is None: print('Did not find alarm named %s using alarm-list' % alarm_name, file=sys.stderr) return 1 if alarm_id != alarm_json['id']: print('Alarm %s has wrong id, expected %s but was %s' % (alarm_name, alarm_id, alarm_json['id']), file=sys.stderr) return 1 # Output metrics that will cause it to go ALARM # Wait for it to change to ALARM if not output_metrics(alarm_id, 'ALARM', [[metric_name, base_dimension]]): return 1 states.append('ALARM') # Modify Alarm by adding new expression that will cause it to go OK print('Modify Alarm expression so it will go to OK') new_metric_name = 'other_metric' new_dimension = 'dim=42' new_expression = '%s and max(%s{%s}) > 100' % (expression, new_metric_name, new_dimension) alarm_json = cli_wrapper.patch_alarm(alarm_id, '--expression', new_expression) if alarm_json['expression'] != new_expression: print('Did not change expression to %s instead was %s' % (new_expression, alarm_json['expression']), file=sys.stderr) return 1 # Output metrics that will cause it to go OK # Wait for it to change to OK if not output_metrics(alarm_id, 'OK', [[metric_name, base_dimension], [new_metric_name, new_dimension]]): return 1 states.append('OK') # Modify Alarm by deleting expression that will cause Alarm to go ALARM print('Delete Alarm sub expression so it will go to ALARM') cli_wrapper.patch_alarm(alarm_id, '--expression', expression) # Output metrics that will cause it to go ALARM # Wait for it to change to ALARM print('Output extra dimensions to make sure match occurs') extra_dimension = base_dimension + ',Extra=More' if not output_metrics(alarm_id, 'ALARM', [[metric_name, extra_dimension]]): return 1 states.append('ALARM') # Modify Alarm by setting alarm state to OK print('Set Alarm to OK, wait for transition back to ALARM') cli_wrapper.change_alarm_state(alarm_id, 'OK') states.append('OK') # Output metrics that will cause it to go back to ALARM # Wait for it to change to ALARM if not output_metrics(alarm_id, 'ALARM', [[metric_name, base_dimension], [new_metric_name, new_dimension]]): return 1 states.append('ALARM') # Query History # Delete ALARM print('Delete alarm') cli_wrapper.run_mon_cli(['alarm-delete', alarm_id], useJson=False) # Ensure it can't be queried if cli_wrapper.find_alarm_by_name(alarm_name) is not None: print('Still found alarm %s after it was deleted' % alarm_name, file=sys.stderr) return 1 # Query History, ensure they still show up if not utils.check_alarm_history(alarm_id, states): return 1 # Success return 0
def smoke_test(): notification_name = config['notification']['name'] notification_addr = config['notification']['addr'] notification_type = config['notification']['type'] alarm_definition_name = config['alarm']['name'] metric_name = config['metric']['name'] metric_dimensions = config['metric']['dimensions'] statsd_metric_name = config['statsd_metric']['name'] statsd_metric_dimensions = config['statsd_metric']['dimensions'] cleanup(notification_name, alarm_definition_name) # Query how many metrics there are for the Alarm hour_ago = datetime.datetime.now() - datetime.timedelta(hours=1) hour_ago_str = hour_ago.strftime('%Y-%m-%dT%H:%M:%S') print('Getting metrics for {}{} '.format(metric_name, metric_dimensions)) initial_num_metrics = count_metrics(metric_name, metric_dimensions, hour_ago_str) if initial_num_metrics is None or initial_num_metrics == 0: msg = ('No metric {} with dimensions {} received in last hour'.format( metric_name, metric_dimensions)) return False, msg print('Getting metrics for {}{} '.format(statsd_metric_name, statsd_metric_dimensions)) initial_statsd_num_metrics = count_metrics(statsd_metric_name, statsd_metric_dimensions, hour_ago_str) # statsd metrics may not have been sent yet, which will return None from the CLI wrapper if initial_statsd_num_metrics is None: initial_statsd_num_metrics = 0 start_time = time.time() # Create Notification through CLI notif_id = cli_wrapper.create_notification(notification_name, notification_addr, notification_type) # Create Alarm through CLI expression = config['alarm']['expression'] description = config['alarm']['description'] alarm_def_id = cli_wrapper.create_alarm_definition( alarm_definition_name, expression, description=description, ok_notif_id=notif_id, alarm_notif_id=notif_id, undetermined_notif_id=notif_id) # Wait for an alarm to be created alarm_id = wait_for_alarm_creation(alarm_def_id) if alarm_id is None: received_num_metrics = count_metrics(metric_name, metric_dimensions, hour_ago_str) if received_num_metrics == initial_num_metrics: print('Did not receive any {}{} metrics while waiting'.format( metric_name, metric_dimensions)) else: delta = received_num_metrics - initial_num_metrics print('Received {} {} metrics while waiting'.format( delta, metric_name)) return False, 'Alarm creation error' # Ensure it is created in the right state initial_state = 'UNDETERMINED' if not utils.check_alarm_state(alarm_id, initial_state): msg = 'Alarm is in an invalid initial state' return False, msg states = [] states.append(initial_state) state = wait_for_alarm_state_change(alarm_id, initial_state) if state is None: msg = 'Alarm is in an invalid state' return False, msg if state != 'ALARM': print('Wrong final state, expected ALARM but was {}'.format(state), file=sys.stderr) msg = 'Alarm is in an invalid final state' return False, msg states.append(state) new_state = 'OK' states.append(new_state) if not cli_wrapper.change_alarm_state(alarm_id, new_state): msg = 'Unable to change Alarm state' return False, msg # There is a bug in the API which allows this to work. Soon that # will be fixed and this will fail if len(sys.argv) > 1: final_state = 'ALARM' states.append(final_state) state = wait_for_alarm_state_change(alarm_id, new_state) if state is None: msg = 'Alarm is in an unknown state' return False, msg if state != final_state: msg = ('Wrong final state, expected {} but was {}'.format( final_state, state)) return False, msg # If the alarm changes state too fast, then there isn't time for the new # metric to arrive. Unlikely, but it has been seen ensure_at_least(time.time() - start_time, 35) change_time = time.time() - start_time final_num_metrics = count_metrics(metric_name, metric_dimensions, hour_ago_str) if final_num_metrics <= initial_num_metrics: msg = ('No new metrics received for {}{} in {} seconds'.format( metric_name, metric_dimensions, change_time)) return False, msg print('Received {} metrics in {} seconds'.format( (final_num_metrics - initial_num_metrics), change_time)) if not utils.check_alarm_history(alarm_id, states): msg = 'Invalid alarm history' return False, msg # Notifications are only sent out for the changes, so omit the first state if not check_notifications(alarm_id, states[1:]): msg = 'Could not find correct notifications for alarm {}'.format( alarm_id) return False, msg # Check that monasca statsd is sending metrics # Metrics may take some time to arrive print('Waiting for statsd metrics') for x in range(0, 30): final_statsd_num_metrics = count_metrics(statsd_metric_name, statsd_metric_dimensions, hour_ago_str) if final_statsd_num_metrics > initial_statsd_num_metrics: break if x >= 29: msg = 'No metrics received for statsd metric {}{} in {} seconds'.format( statsd_metric_name, statsd_metric_dimensions, time.time() - start_time) return False, msg time.sleep(1) print('Received {0} metrics for {1}{2} in {3} seconds'.format( final_statsd_num_metrics - initial_statsd_num_metrics, statsd_metric_name, statsd_metric_dimensions, time.time() - start_time)) msg = '' return True, msg
def smoke_test(): notification_name = config['notification']['name'] notification_addr = config['notification']['addr'] notification_type = config['notification']['type'] alarm_definition_name = config['alarm']['name'] metric_name = config['metric']['name'] metric_dimensions = config['metric']['dimensions'] statsd_metric_name = config['statsd_metric']['name'] statsd_metric_dimensions = config['statsd_metric']['dimensions'] cleanup(notification_name, alarm_definition_name) # Query how many metrics there are for the Alarm hour_ago = datetime.datetime.utcnow() - datetime.timedelta(hours=1) hour_ago_str = hour_ago.strftime('%Y-%m-%dT%H:%M:%S') + 'Z' print('Getting metrics for {}{} '.format(metric_name, metric_dimensions)) initial_num_metrics = count_metrics(metric_name, metric_dimensions, hour_ago_str) if initial_num_metrics is None or initial_num_metrics == 0: msg = ('No metric {} with dimensions {} received in last hour'.format( metric_name, metric_dimensions)) return False, msg print('Getting metrics for {}{} '.format(statsd_metric_name, statsd_metric_dimensions)) initial_statsd_num_metrics = count_metrics(statsd_metric_name, statsd_metric_dimensions, hour_ago_str) # statsd metrics may not have been sent yet, which will return None from the CLI wrapper if initial_statsd_num_metrics is None: initial_statsd_num_metrics = 0 start_time = time.time() # Create Notification through CLI notif_id = cli_wrapper.create_notification(notification_name, notification_addr, notification_type) # Create Alarm through CLI expression = config['alarm']['expression'] description = config['alarm']['description'] alarm_def_id = cli_wrapper.create_alarm_definition( alarm_definition_name, expression, description=description, ok_notif_id=notif_id, alarm_notif_id=notif_id, undetermined_notif_id=notif_id) # Wait for an alarm to be created alarm_id = wait_for_alarm_creation(alarm_def_id) if alarm_id is None: received_num_metrics = count_metrics(metric_name, metric_dimensions, hour_ago_str) if received_num_metrics == initial_num_metrics: print('Did not receive any {}{} metrics while waiting'.format(metric_name,metric_dimensions)) else: delta = received_num_metrics - initial_num_metrics print('Received {} {} metrics while waiting'.format(delta, metric_name)) return False, 'Alarm creation error' # Ensure it is created in the right state initial_state = 'UNDETERMINED' if not utils.check_alarm_state(alarm_id, initial_state): msg = 'Alarm is in an invalid initial state' return False, msg states = [] states.append(initial_state) state = wait_for_alarm_state_change(alarm_id, initial_state) if state is None: msg = 'Alarm is in an invalid state' return False, msg if state != 'ALARM': print('Wrong final state, expected ALARM but was {}'.format(state), file=sys.stderr) msg = 'Alarm is in an invalid final state' return False, msg states.append(state) new_state = 'OK' states.append(new_state) if not cli_wrapper.change_alarm_state(alarm_id, new_state): msg = 'Unable to change Alarm state' return False, msg final_state = 'ALARM' states.append(final_state) state = wait_for_alarm_state_change(alarm_id, new_state) if state is None: msg = 'Alarm is in an unknown state' return False, msg if state != final_state: msg = ('Wrong final state, expected {} but was {}'.format(final_state, state)) return False, msg # If the alarm changes state too fast, then there isn't time for the new # metric to arrive. Unlikely, but it has been seen ensure_at_least(time.time() - start_time, 35) change_time = time.time() - start_time final_num_metrics = count_metrics(metric_name, metric_dimensions, hour_ago_str) if final_num_metrics <= initial_num_metrics: msg = ('No new metrics received for {}{} in {} seconds'.format(metric_name, metric_dimensions, change_time)) return False, msg print('Received {} metrics in {} seconds'.format((final_num_metrics - initial_num_metrics), change_time)) if not utils.check_alarm_history(alarm_id, states): msg = 'Invalid alarm history' return False, msg # Notifications are only sent out for the changes, so omit the first state if not check_notifications(alarm_id, states[1:]): msg = 'Could not find correct notifications for alarm {}'.format(alarm_id) return False, msg # Check that monasca statsd is sending metrics # Metrics may take some time to arrive print('Waiting for statsd metrics') for x in range(0,30): final_statsd_num_metrics = count_metrics(statsd_metric_name, statsd_metric_dimensions, hour_ago_str) if final_statsd_num_metrics > initial_statsd_num_metrics: break if x >= 29: msg = 'No metrics received for statsd metric {}{} in {} seconds'.format( statsd_metric_name, statsd_metric_dimensions, time.time() - start_time) return False, msg time.sleep(1) print('Received {0} metrics for {1}{2} in {3} seconds'.format(final_statsd_num_metrics - initial_statsd_num_metrics, statsd_metric_name, statsd_metric_dimensions, time.time() - start_time)) msg = '' return True, msg
def main(): if not utils.ensure_has_notification_engine(): return 1 mail_host = 'localhost' metric_host = subprocess.check_output(['hostname', '-f']).strip() utils.setup_cli() notification_name = 'Jahmon Smoke Test' notification_email_addr = 'root@' + mail_host alarm_name = 'high cpu and load' metric_name = 'load_avg_1_min' metric_dimensions = {'hostname': metric_host} cleanup(notification_name, alarm_name) # Query how many metrics there are for the Alarm initial_num_metrics = count_metrics(metric_name, metric_dimensions) if initial_num_metrics is None: return 1 start_time = time.time() # Create Notification through CLI notification_id = cli_wrapper.create_notification(notification_name, notification_email_addr) # Create Alarm through CLI expression = 'max(cpu_system_perc) > 0 and ' + \ 'max(load_avg_1_min{hostname=' + metric_host + '}) > 0' description = 'System CPU Utilization exceeds 1% and ' + \ 'Load exceeds 3 per measurement period' alarm_id = cli_wrapper.create_alarm(alarm_name, expression, description=description, ok_notif_id=notification_id, alarm_notif_id=notification_id, undetermined_notif_id=notification_id) # Ensure it is created in the right state initial_state = 'UNDETERMINED' if not utils.check_alarm_state(alarm_id, initial_state): return 1 states = [] states.append(initial_state) state = wait_for_alarm_state_change(alarm_id, initial_state) if state is None: return 1 if state != 'ALARM': print('Wrong final state, expected ALARM but was %s' % state, file=sys.stderr) return 1 states.append(state) new_state = 'OK' states.append(new_state) cli_wrapper.change_alarm_state(alarm_id, new_state) # There is a bug in the API which allows this to work. Soon that # will be fixed and this will fail if len(sys.argv) > 1: final_state = 'ALARM' states.append(final_state) state = wait_for_alarm_state_change(alarm_id, new_state) if state is None: return 1 if state != final_state: print('Wrong final state, expected %s but was %s' % (final_state, state), file=sys.stderr) return 1 # If the alarm changes state too fast, then there isn't time for the new # metric to arrive. Unlikely, but it has been seen ensure_at_least(time.time() - start_time, 35) change_time = time.time() - start_time final_num_metrics = count_metrics(metric_name, metric_dimensions) if final_num_metrics <= initial_num_metrics: print('No new metrics received in %d seconds' % change_time, file=sys.stderr) return 1 print('Received %d metrics in %d seconds' % ((final_num_metrics - initial_num_metrics), change_time)) if not utils.check_alarm_history(alarm_id, states): return 1 # Notifications are only sent out for the changes, so omit the first state if not check_notifications(alarm_id, states[1:]): return 1 return 0