#!/usr/bin/python import collectd import string import sys from NagAconda import Plugin MyPlugin = Plugin("Plugin to check memory usage from collectd", "1.0") MyPlugin.add_option('H', 'host', 'host to check.', required=True) MyPlugin.add_option('S','socket','Socket to connect to. (default=/var/run/collectd-unixsock)',required=False,default='/var/run/collectd-unixsock') MyPlugin.enable_status('warning') MyPlugin.enable_status('critical') MyPlugin.start() formula = "(#"+MyPlugin.options.host+"/memory/memory-free 0# + #"+MyPlugin.options.host+"/memory/memory-cached 0# + #"+MyPlugin.options.host+"/memory/memory-buffered 0# ) / ( #"+MyPlugin.options.host+"/memory/memory-free 0# + #t430s-fpg/memory/memory-cached 0# + #"+MyPlugin.options.host+"/memory/memory-buffered 0# + #"+MyPlugin.options.host+"/memory/memory-used 0#)*100" c = collectd.Collect(MyPlugin.options.socket) val=c.calculate(formula) MyPlugin.set_value('mem_free', val, scale='%') MyPlugin.finish()
try: content = json.loads(response.content) except Exception as e: check_api.unknown_error("%s health check response was malformed: %s" % (check_api.options.action, e)) else: response = requests.get("%s:%d/api/tasks/?limit=%d" % (check_api.options.host, int(check_api.options.port), check_api.options.limit)) try: response.raise_for_status() except Exception as e: print "Status Critical, task list for node %s cannot be retrieved" % check_api.options.node sys.exit(2) try: content = json.loads(response.content) except Exception as e: check_api.unknown_error("%s health check response was malformed: %s" % (check_api.options.action, e)) failed = [] for task in content: if task[1]["failed"]: failed.append(task[0]) if failed: print "Status Warning, the last %d tasks for node %s contain failures: %s" % (check_api.options.limit, check_api.options.node, failed) sys.exit(1) check_api.set_status_message("Celery health check successful") check_api.finish()
response = requests.get( "%s:%d/api/tasks?limit=%d" % (check_api.options.host, int( check_api.options.port), check_api.options.limit)) try: response.raise_for_status() except Exception as e: print "Status Critical, task list for node %s cannot be retrieved" % check_api.options.node sys.exit(2) try: content = json.loads(response.content) except Exception as e: check_api.unknown_error("%s health check response was malformed: %s" % (check_api.options.action, e)) failed = [] for task in content.items(): if task[1]['failed']: failed.append(task[0]) if failed: print "Status Warning, the last %d tasks for node %s contain failures: %s" % ( check_api.options.limit, check_api.options.node, failed) sys.exit(1) check_api.set_status_message("Celery health check successful") check_api.finish()
gmetad_port=plugin.options.gmetad_port, gmetad_timeout=plugin.options.gmetad_timeout, cache_path=plugin.options.cache_path, cache_ttl=plugin.options.cache_ttl, cache_ttl_splay=plugin.options.cache_ttl_splay, cache_grace=plugin.options.cache_grace, metrics_max_age=plugin.options.metrics_max_age, debug_level=plugin.options.verbose).get_value( metric_host=plugin.options.metric_host, metric_name=plugin.options.metric_name) plugin.set_status_message( '%s = %s %s' % (value['title'], value['value'], value['units'])) if value['units'].upper() in ('B', 'KB', 'MB', 'GB', 'TB') or \ value['units'].lower() in ('s', 'ms', 'us', 'ns', '%'): plugin.set_value(plugin.options.metric_name, value['value'], scale=value['units']) else: plugin.set_value(plugin.options.metric_name, value['value']) except (GangliaMetrics.MetricNotFoundError), e: plugin.unknown_error(str(e)) except (Exception), e: print(e) sys.exit(2) # Print results plugin.finish()
gmetad_port=plugin.options.gmetad_port, gmetad_timeout=plugin.options.gmetad_timeout, cache_path=plugin.options.cache_path, cache_ttl=plugin.options.cache_ttl, cache_ttl_splay=plugin.options.cache_ttl_splay, cache_grace=plugin.options.cache_grace, metrics_max_age=plugin.options.metrics_max_age, debug_level=plugin.options.verbose).get_value( metric_host=plugin.options.metric_host, metric_name=plugin.options.metric_name) plugin.set_status_message('%s = %s %s' % (value['title'], value['value'], value['units'])) if value['units'].upper() in ('B', 'KB', 'MB', 'GB', 'TB') or \ value['units'].lower() in ('s', 'ms', 'us', 'ns', '%'): plugin.set_value(plugin.options.metric_name, value['value'], scale=value['units']) else: plugin.set_value(plugin.options.metric_name, value['value']) except (GangliaMetrics.MetricNotFoundError), e: plugin.unknown_error(str(e)) except (Exception), e: print(e) sys.exit(2) # Print results plugin.finish()
system2_bytes_size_float = calc_bnumber(btrfs_output_parsed.system2.bytesize_total) metadata_bytes_used_float = calc_bnumber(btrfs_output_parsed.metadata.bytesize_used) metadata_bytes_size_float = calc_bnumber(btrfs_output_parsed.metadata.bytesize_total) # DEBUG: # print btrfs_output_parsed.dump() # set nagios output btrfs_check.set_range('warning', 100000000000000000000000, range_num=2) btrfs_check.set_range('critical', 200000000000000000000000, range_num=2) btrfs_check.set_value("data_used", btrfs_output_parsed.data.bytesize_used.num, scale=btrfs_output_parsed.data.bytesize_used.unit, threshold=2) btrfs_check.set_value("data_total", btrfs_output_parsed.data.bytesize_total.num, scale=btrfs_output_parsed.data.bytesize_total.unit, threshold=2) btrfs_check.set_value("data_ratio", data_usage_percentage_float, scale="%", threshold=1) btrfs_check.set_value("system1_used", btrfs_output_parsed.system1.bytesize_used.num, scale=btrfs_output_parsed.system1.bytesize_used.unit, threshold=2) btrfs_check.set_value("system1_total", btrfs_output_parsed.system1.bytesize_total.num, scale=btrfs_output_parsed.system1.bytesize_total.unit, threshold=2) btrfs_check.set_value("system2_used", btrfs_output_parsed.system2.bytesize_used.num, scale=btrfs_output_parsed.system2.bytesize_used.unit, threshold=2) btrfs_check.set_value("system2_total", btrfs_output_parsed.system2.bytesize_total.num, scale=btrfs_output_parsed.system2.bytesize_total.unit, threshold=2) btrfs_check.set_value("metadata_used", btrfs_output_parsed.metadata.bytesize_used.num, scale=btrfs_output_parsed.metadata.bytesize_used.unit, threshold=2) btrfs_check.set_value("metadata_total", btrfs_output_parsed.metadata.bytesize_total.num, scale=btrfs_output_parsed.metadata.bytesize_total.unit, threshold=2) btrfs_check.set_status_message("{0}{1} of {2}{3} used ({4:.2%})".format(btrfs_output_parsed.data.bytesize_used.num, btrfs_output_parsed.data.bytesize_used.unit, btrfs_output_parsed.data.bytesize_total.num, btrfs_output_parsed.data.bytesize_total.unit, data_usage_ratio_float )) btrfs_check.finish()
g.start() # Bounds checking on crit and warn if g.options.raw_critical < g.options.raw_warning: g.unknown_error("ERROR: Critical level (%s) is set LOWER than Warning level (%s)" % ( g.options.raw_critical, g.options.raw_warning, )) # Build url # FIXME: pickle seems efficient, but maybe harder to debug? url = 'http://%s/render?from=%s&target=%s&format=pickle' % ( g.options.host, g.options.window, g.options.target, ) if debug: print 'DEBUG URL:',url value=get_value(url) if debug: print 'DEBUG VALUE:', value # Error parsing if value == None: g.unknown_error("ERROR: Could not parse data from URL - %s" % url) # Set it and forget it g.set_value(g.options.target, float(value)) g.set_status_message("%s = %f" % (g.options.target, value)) g.finish()
url = graphite.options.url if not url: url = ''.join([ 'http://localhost/render?format=raw', '&from=-', graphite.options.minute, 'minutes', '&target=servers.', graphite.options.hostMafia.replace('.', '_'), '_9400.', graphite.options.keyMafia, ]) try: usock = urllib2.urlopen(url) data = usock.read() usock.close() assert data pieces = data.split("|") counter = pieces[0].split(",")[0] values = pieces[1].split(",")[:-1] values = map(lambda x: 0.0 if x == 'None' else float(x), values) assert not any(map(math.isnan, values)) avg = sum(values)/len(values); graphite.set_value(counter, avg) graphite.set_status_message("Avg value of %s was %f" % (counter, avg)) graphite.finish() except Exception, e: graphite.unknown_error("Error: %s" % e)