def probe(self): cmd = [self.executable, "--json", "snapshots", "--last"] if self.host: cmd += ["--host", self.host] for path in self.paths: cmd += ["--path", path] for tag in self.tags: cmd += ["--tag", tag] output = subprocess.check_output(cmd) for snapshot in json.loads(output): when = pendulum.parse(snapshot["time"]) hours_old = (pendulum.now() - when).in_hours() label = snapshot["tags"][0] if snapshot["tags"] else snapshot[ "short_id"] yield Metric(label, hours_old, min=0, context="snapshot") output = subprocess.check_output([self.executable, "--json", "stats"]) stats = json.loads(output) yield Metric("total_size", stats["total_size"], uom="B", context="size") yield Metric("total_files", stats["total_file_count"], uom="c", context="files")
def probe(self): logging.info('Getting API key') start_time = ident_start_time = time() ident_request = urllib2.Request( 'https://identity.api.rackspacecloud.com/v1.0', headers={'X-Auth-Key': self.api_key, 'X-Auth-User': self.username}) ident_response = urllib2.urlopen(ident_request, timeout=self.timeout) ident_headers = ident_response.info() auth_key = ident_headers.getheader('X-Auth-Token') storage_url = ident_headers.getheader('X-Storage-Url') ident_end_time = time() ident_time = ident_end_time - ident_start_time logging.info('Getting Container Size') cont_start_time = time() cont_request = urllib2.Request( storage_url + '/' + self.container, headers={'X-Auth-Token': auth_key}) cont_request.get_method = lambda: 'HEAD' cont_response = urllib2.urlopen(cont_request, timeout=self.timeout) cont_headers = cont_response.info() obj_count = cont_headers.getheader('X-Container-Object-Count') bytes_used = cont_headers.getheader('X-Container-Bytes-Used') cont_end_time = end_time = time() cont_time = cont_end_time - cont_start_time total_time = end_time - start_time return ( Metric('total_time', total_time, context='time'), Metric('item_count', obj_count, context='count'), Metric('bytes_used', bytes_used, context='count'), Metric('ident_time', ident_time, context='time'), Metric('container_time', cont_time, context='time'), )
def probe(self): """ Query InfluxDB; yield the count and mean of the measurements. """ def get_value(result): for key, value in result.iteritems(): if key != "time": return value values = [ get_value(result) for result in self.get_results() ] count = len(values) yield Metric(COUNT, count, context=COUNT) try: total = float(sum(values)) mean = 0 if count == 0 else total / count yield Metric(MEAN, mean, context=MEAN) except TypeError: # non numeric queries won't have a mean pass # null context; values are not validated individually yield Metric(VALUES, values, context="null")
def probe(self): """Query system state and return metrics. :return: generator that emits :class:`~nagiosplugin.metric.Metric` objects """ # We don’t use `systemctl --failed --no-legend`, because we want to # collect performance data of all units. try: p = subprocess.Popen( ['systemctl', 'list-units', '--all', '--no-legend'], stderr=subprocess.PIPE, stdin=subprocess.PIPE, stdout=subprocess.PIPE) stdout, stderr = p.communicate() except OSError as e: raise nagiosplugin.CheckError(e) if stderr: raise nagiosplugin.CheckError(stderr) # Dictionary to store all units according their active state. units = { 'failed': [], 'active': [], 'activating': [], 'inactive': [], } if stdout: # Output of `systemctl list-units --all --no-legend`: # UNIT LOAD ACTIVE SUB JOB DESCRIPTION # foobar.service loaded active waiting Description text count_units = 0 for line in io.StringIO(stdout.decode('utf-8')): split_line = line.split() # foobar.service unit = split_line[0] # failed active = split_line[2] # Only count not excludes units. if unit not in self.excludes: units[active].append(unit) count_units += 1 for unit in units['failed']: if unit not in self.excludes: yield Metric(name=unit, value='failed', context='unit') for active, unit_names in units.items(): yield Metric(name='units_{}'.format(active), value=len(units[active]), context='performance_data') yield Metric(name='count_units', value=count_units, context='performance_data') if len(units['failed']) == 0: yield Metric(name='all', value=None, context='unit')
def probe(self): load1 = float(self.snmpget('1.3.6.1.4.1.2021.10.1.5.1'))/100 load5 = float(self.snmpget('1.3.6.1.4.1.2021.10.1.5.2'))/100 load15 = float(self.snmpget('1.3.6.1.4.1.2021.10.1.5.3'))/100 return [Metric('load1', load1, min=0, context='load'), Metric('load5', load5, min=0, context='default'), Metric('load15', load15, min=0, context='default')]
def evaluate(self, metric: Metric, resource) -> nagiosplugin.Result: """ Gets pool thresholds and compares them with args. """ metric.warn_thresh = self.parse_threshold(self.warn, metric.max) metric.crit_thresh = self.parse_threshold(self.crit, metric.max) # if cmp(metric.value, self.warn, metric.max): if metric.warn_thresh <= metric.value: return self.result_cls(Warn, metric=metric) elif metric.crit_thresh <= metric.value: return self.result_cls(Critical, metric=metric) return self.result_cls(Ok, metric=metric)
def probe(self): memory_total = float(self.snmpget('1.3.6.1.4.1.2021.4.5.0')) memory_unused = float(self.snmpget('1.3.6.1.4.1.2021.4.6.0')) memory_available = ( memory_unused + float(self.snmpget('1.3.6.1.4.1.2021.4.15.0'))) memory_percent = 100 / memory_total * memory_available return [Metric('memory_percent', memory_percent, uom='%', min=0, max=100, context='memory'), Metric('memory_available', memory_available, uom='MB', min=0, max=memory_total, context='default'), Metric('memory_unused', memory_unused, uom='MB', min=0, max=memory_total, context='default'), Metric('memory_total', memory_total, context='null')]
def probe(self) -> typing.Generator[Metric, None, None]: for state_spec, count in unit_cache.count_by_states( ( 'active_state:failed', 'active_state:active', 'active_state:activating', 'active_state:inactive', ), exclude=opts.exclude).items(): yield Metric(name='units_{}'.format(state_spec.split(':')[1]), value=count, context='performance_data') yield Metric(name='count_units', value=unit_cache.count, context='performance_data')
def probe(self) -> typing.Generator[Metric, None, None]: """Query system state and return metrics. :return: generator that emits :class:`~nagiosplugin.metric.Metric` objects """ stdout = None try: stdout = execute_cli(['systemd-analyze']) except nagiosplugin.CheckError: pass if stdout: # First line: # Startup finished in 1.672s (kernel) + 21.378s (userspace) = # 23.050s # On raspian no second line # Second line: # graphical.target reached after 1min 2.154s in userspace match = re.search(r'reached after (.+) in userspace', stdout) if not match: match = re.search(r' = (.+)\n', stdout) # Output when boot process is not finished: # Bootup is not yet finished. Please try again later. if match: yield Metric(name='startup_time', value=format_timespan_to_seconds(match.group(1)), context='startup_time')
def test_with_another_hit_rate(mocker): """ Check if needed info key not exists or having not numeric value """ class FakeStrictRedis(mocker.MagicMock): """ StrictRedis magicmock class with info() method """ def info(self, section=''): """ Return fake redis info data :returns: fake info data :rtype: dict """ self.foo(section) hit_rate_test = REDIS_INFO_MOCK hit_rate_test['keyspace_hits'] = 90 hit_rate_test['keyspace_misses'] = 10 return REDIS_INFO_MOCK with mocker.patch( 'temelio_monitoring.resource.database.' 'redis.scalar_info_value.StrictRedis', new_callable=FakeStrictRedis): expected_metric = Metric('db0_hit_rate', 0.9, context='db0_hit_rate') resource = ScalarInfoValue(metric_name='hit_rate', section_name='foo') metric = next(resource.probe()) assert metric.name == expected_metric.name assert metric.value == expected_metric.value assert metric.context == expected_metric.context
def probe(self): """Query system state and return metrics. :return: generator that emits :class:`~nagiosplugin.metric.Metric` objects """ # Execute `systemctl is-active <service>` and get output # - active # - inactive (by unkown unit file) # - failed try: p = subprocess.Popen(['systemctl', 'is-active', self.unit], stderr=subprocess.PIPE, stdin=subprocess.PIPE, stdout=subprocess.PIPE) stdout, stderr = p.communicate() except OSError as e: raise nagiosplugin.CheckError(e) if stderr: raise nagiosplugin.CheckError(stderr) if stdout: for line in io.StringIO(stdout.decode('utf-8')): active = line.strip() yield Metric(name=self.unit, value=active, context='unit')
def probe(self): """Query system state and return metrics. :return: generator that emits :class:`~nagiosplugin.metric.Metric` objects """ try: p = subprocess.Popen(['systemd-analyze'], stderr=subprocess.PIPE, stdin=subprocess.PIPE, stdout=subprocess.PIPE) stdout, stderr = p.communicate() except OSError as e: raise nagiosplugin.CheckError(e) if stderr: raise nagiosplugin.CheckError(stderr) if stdout: stdout = stdout.decode('utf-8') # First line: # Startup finished in 1.672s (kernel) + 21.378s (userspace) = # 23.050s # On raspian no second line # Second line: # graphical.target reached after 1min 2.154s in userspace match = re.search(r'reached after (.+) in userspace', stdout) if not match: match = re.search(r' = (.+)\n', stdout) yield Metric(name='startup_time', value=format_timespan_to_seconds(match.group(1)), context='startup_time')
def probe(self): self.disk_names = [] for item in self.snmpwalk('1.3.6.1.4.1.6574.2.1.1.2'): i = item.oid.split('.')[-1] disk_name = item.value disk_name = disk_name.replace(" ", "") self.disk_names.append(disk_name) disk_status_nr = int( self.snmpget('1.3.6.1.4.1.6574.2.1.1.5.' + str(i))) disk_temp = float( self.snmpget('1.3.6.1.4.1.6574.2.1.1.6.' + str(i))) yield Metric('status%s' % disk_name, disk_status_nr, context='disk_status') yield Metric('temperature%s' % disk_name, disk_temp, context='temp')
def probe(self): """ :return: generator that emits :class:`~nagiosplugin.metric.Metric` objects """ try: p = subprocess.Popen(['systemctl', 'list-timers', '--all'], stderr=subprocess.PIPE, stdin=subprocess.PIPE, stdout=subprocess.PIPE) stdout, stderr = p.communicate() except OSError as e: raise nagiosplugin.CheckError(e) if stderr: raise nagiosplugin.CheckError(stderr) # NEXT LEFT # Sat 2020-05-16 15:11:15 CEST 34min left # LAST PASSED # Sat 2020-05-16 14:31:56 CEST 4min 20s ago # UNIT ACTIVATES # apt-daily.timer apt-daily.service if stdout: lines = stdout.decode('utf-8').splitlines() table_heading = lines[0] self.column_boundaries = self.detect_column_boundaries( table_heading) # Remove the first line because it is the header. # Remove the two last lines: empty line + "XX timers listed." table_body = lines[1:-2] state = nagiosplugin.Ok # ok for row in table_body: unit = self.get_column_text(row, 'UNIT') if self.re_match(unit): continue next_date_time = self.get_column_text(row, 'NEXT') if next_date_time == 'n/a': passed_text = self.get_column_text(row, 'PASSED') if passed_text == 'n/a': state = nagiosplugin.Critical else: passed = format_timespan_to_seconds(passed_text) if passed_text == 'n/a' or passed >= self.critical: state = nagiosplugin.Critical elif passed >= self.warning: state = nagiosplugin.Warn yield Metric(name=unit, value=state, context='dead_timers')
def test_with_args(context_name, expected_output): """ Check context output """ context = ConnectionCount(context_name, 2, 3) metric = Metric('my_metric', 5) assert isinstance(context, ScalarContext) is True assert context.describe(metric) == expected_output
def probe(self) -> typing.Generator[Metric, None, None]: counter = 0 for unit in unit_cache.list(include=opts.include, exclude=opts.exclude): yield Metric(name=unit.name, value=unit, context='units') counter += 1 if counter == 0: raise ValueError('Please verify your --include-* and --exclude-* ' 'options. No units have been added for ' 'testing.')
def probe(self): """ Query system state and return metrics. Extending from Nagiosplugin->resource :return: yields Nagios Metric params with defined variables and their values """ index, perfdat = self.update_performance_data( indexes=INDEXES, perfdata=PERFDATA) # Getting Monitoring Data try: for label, value in perfdat['data'].items(): yield Metric(label, float(value['value']), None, context=label) except Exception as e: raise CheckError(e)
def test_desc_without_value(): """ Check if JSON path request not return result """ context = StringValueFromJSON('json_output', 'foo') metric = Metric('my_metric', []) result = context.describe(metric) assert isinstance(context, Context) is True assert isinstance(context, StringValueFromJSON) is True assert 'My metric: None' in result
def test_with_args(expected_string, result, expected_output): """ Check context output """ context = StringValueFromJSON( 'json_output', expected_string=expected_string) result_array = [DatumInContext(result)] metric = Metric('my_metric', result_array) assert isinstance(context, Context) is True assert isinstance(context, StringValueFromJSON) is True assert context.describe(metric) == expected_output
def probe(self): """ Get information data about commands stats and return Metric objects :return: a generator with informations :rtype: generator """ try: # Connect to redis server redis_client = StrictRedis(db=self.database_id, host=self.host, password=self.password, port=self.port) # Get statistics self.redis_infos = redis_client.info(section='commandstats') except RedisError as error: raise CheckError('Error with Redis server connection: {}'.format( str(error))) # Manage probe cookie file data with Cookie(self.probe_state_file) as state_file: # Iterate over all commands stats for stat_name, stat_new_values in self.redis_infos.items(): # Manage file data stat_old_values = state_file.get(stat_name) if stat_old_values is None: stat_old_values = {'calls': 0, 'usec': 0} state_file[stat_name] = stat_new_values # Manage reset or restart use_new_values = False if stat_old_values.get('calls', 0) > stat_new_values['calls']: use_new_values = True # Manage metrics for metric_name in ['calls', 'usec']: if use_new_values: metric_value = stat_new_values[metric_name] else: metric_value = (stat_new_values[metric_name] - stat_old_values[metric_name]) yield Metric('db{}_{}.{}'.format(self.database_id, stat_name, metric_name), metric_value, context='default')
def test_eval_without_value(warning, critical, exp_state): """ Check evaluate method, Ressource param not used, so set it to None """ context = CountValuesFromJSON('foobar', warning=warning, critical=critical) metric = Metric('foobar', [], context='foobar') result = context.evaluate(metric, None) assert isinstance(context, Context) is True assert isinstance(context, CountValuesFromJSON) is True assert result.state == exp_state assert context.describe(result.metric) == 'Foobar: 0 ([])' assert context.performance(result.metric, None).label == 'foobar' assert context.performance(result.metric, None).value == 0
def test_eval_without_value(): """ Check if JSON path request not return result """ context = StringValueFromJSON('json_output', 'foo') metric = Metric('my_metric', []) with pytest.raises(RuntimeError) as err: context.evaluate(metric, None) assert isinstance(context, Context) is True assert isinstance(context, StringValueFromJSON) is True assert 'No value returned by probe' in str(err)
def test_eval_with_value( metric_name, warning, critical, result, eval_result): """ Check evaluate method, Ressource param not used, so set it to None """ context = ScalarValueFromJSON( metric_name, warning=warning, critical=critical) result_array = [DatumInContext(result)] metric = Metric(metric_name, result_array, context=metric_name) assert isinstance(context, Context) is True assert isinstance(context, ScalarValueFromJSON) is True assert context.evaluate(metric, None).state == eval_result
def test_eval_with_multiple_values(warning, critical, exp_state): """ Check evaluate method, Ressource param not used, so set it to None """ context = CountValuesFromJSON('foobar', warning=warning, critical=critical) result_array = [DatumInContext('foo'), DatumInContext('bar')] metric = Metric('foobar', result_array, context='foobar') result = context.evaluate(metric, None) assert isinstance(context, Context) is True assert isinstance(context, CountValuesFromJSON) is True assert result.state == exp_state assert context.describe(result.metric) == "Foobar: 2 (['foo', 'bar'])" assert context.performance(result.metric, None).label == 'foobar' assert context.performance(result.metric, None).value == 2
def test_eval_with_value( metric_name, expected_string, result, operator, eval_result): """ Check evaluate method, Ressource param not used, so set it to None """ context = StringValueFromJSON( metric_name, expected_string=expected_string, operator=operator) result_array = [DatumInContext(result)] metric = Metric(metric_name, result_array, context=metric_name) assert isinstance(context, Context) is True assert isinstance(context, StringValueFromJSON) is True assert context.evaluate(metric, None).state == eval_result
def replace_metric_value(metric, new_value): """ Create new metric with value parameter :param metric: Metric value :type metric: nagiosplugin.Metric :returns: New metric with extracted values from JSON path result :rtype: nagiosplugin.Metric """ new_metric = Metric(metric.name, new_value, context=metric.context, contextobj=metric.contextobj, resource=metric.resource) # Return metric with values extracted return new_metric
def probe(self): # Getting CPU Idle idle = self.request.fetch_oid(PERFDATA['oids'][0]) try: if idle: PERFDATA['data']['alert_cpu_percent']['value'] = 100 - int( idle) except Exception as e: raise CheckError('CPU not available. Exception: %s' % e) try: yield Metric('alert_cpu_percent', PERFDATA['data']['alert_cpu_percent']['value'], None, context='alert_cpu_percent') except Exception as e: raise CheckError(e)
def probe(self): cpus = [] self.request.fetch_table(cpus, PERFDATA['oids']) try: if len(cpus): PERFDATA['data']['alert_cpu_percent']['value'] = sum( float(r.value) for r in cpus) / len(cpus) except ZeroDivisionError as e: raise CheckError('CPU not available. Exception: %s' % e) try: yield Metric('alert_cpu_percent', PERFDATA['data']['alert_cpu_percent']['value'], None, context='alert_cpu_percent') except Exception as e: raise CheckError(e)
def probe(self): for item in self.snmpwalk('1.3.6.1.2.1.25.2.3.1.3'): i = item.oid.split('.')[-1] storage_name = item.value if re.match("/volume(?!.+/@docker.*)", storage_name): allocation_units = self.snmpget( '1.3.6.1.2.1.25.2.3.1.4.' + str(i)) size = self.snmpget( '1.3.6.1.2.1.25.2.3.1.5.' + str(i)) used = self.snmpget( '1.3.6.1.2.1.25.2.3.1.6.' + str(i)) storage_size = int( (int(allocation_units) * int(size)) / 1000000000) storage_used = int( (int(used) * int(allocation_units)) / 1000000000) storage_free = int(storage_size - storage_used) yield Metric(storage_name, storage_used, min=0, max=storage_size, context='storage')
def probe(self): status_model = self.snmpget('1.3.6.1.4.1.6574.1.5.1.0') status_serial = self.snmpget('1.3.6.1.4.1.6574.1.5.2.0') status_temperature = float(self.snmpget('1.3.6.1.4.1.6574.1.2.0')) status_system = int(self.snmpget('1.3.6.1.4.1.6574.1.1.0')) status_system_fan = int(self.snmpget('1.3.6.1.4.1.6574.1.4.1.0')) status_cpu_fan = int(self.snmpget('1.3.6.1.4.1.6574.1.4.2.0')) status_power = int(self.snmpget('1.3.6.1.4.1.6574.1.3.0')) return [ Metric('model', status_model, context='null'), Metric('serial', status_serial, context='null'), Metric('temperature', status_temperature, context='temp'), Metric('system', status_system, context='status'), Metric('system_fan', status_system_fan, context='status'), Metric('cpu_fan', status_cpu_fan, context='status'), Metric('power', status_power, context='status'), ]