def report(pods, rs_statuses, nag_statuses): if not pods: print "%s: Unable to locate any pods running mongodb" % ( nagios.status_code_to_label(nagios.UNKNOWN),) return nagios.UNKNOWN unique_rs_statuses = Counter(rs_status for rs_status in rs_statuses) unique_nag_statuses = Counter(nag_status for nag_status in nag_statuses) ret = max(unique_nag_statuses) if unique_rs_statuses[REPLSET_STATUS.PRIMARY] != 1: ret = nagios.CRIT print "%s: There are %s nodes claiming to be primary members of the replica set" % ( nagios.status_code_to_label(ret), unique_rs_statuses[REPLSET_STATUS.PRIMARY]) elif len(rs_statuses) % 2 == 0: ret = nagios.WARN print "%s: There are an even number of voting members (%s) partipating in the replica set" % ( nagios.status_code_to_label(ret), len(rs_statuses)) else: print "%s: There are %s primary and %s secondary members in the replica set" % ( (nagios.status_code_to_label(ret)), unique_rs_statuses[REPLSET_STATUS.PRIMARY], unique_rs_statuses[REPLSET_STATUS.SECONDARY]) for pod, rs_status, nag_status in zip(pods, rs_statuses, nag_statuses): print "%s: %s - %s" % ( nagios.status_code_to_label(nag_status), pod, REPLSET_STATUS.reverse_mapping[rs_status]) return ret
def report(results, errors): if not results: return nagios.UNKNOWN unique_statuses = Counter( status for pod, container, cpu_limit, mem_limit, status in results) ret = max(unique_statuses) if ret == nagios.OK: print "%s: All %s containers have memory and cpu limits set" % ( nagios.status_code_to_label(ret), len(results)) elif ret == nagios.UNKNOWN: print "%s: Unable to determine cpu and memory limits on %s containers" % ( nagios.status_code_to_label(ret), unique_statuses[nagios.UNKNOWN]) elif ret == nagios.WARN: print "%s: There are %s containers that do not have both a cpu and memory limit set" % ( nagios.status_code_to_label(ret), unique_statuses[nagios.WARN]) for pod, container, cpu_limit, mem_limit, status in results: print "%s: %s:%s: - memory limit: %s - cpu limit: %s" % ( nagios.status_code_to_label(status), pod, container, mem_limit, cpu_limit) if errors: ret = nagios.UNKNOWN for pod_name, container_name, ex in errors: print "%s: %s:%s %s" % (nagios.status_code_to_label("WARNING"), pod_name, container_name, ex) return ret
def report(results, errors, minimum): if not results: return nagios.UNKNOWN unique_statuses = Counter(disk_status for pod, container, mount, space_usage, inode_usage, disk_status in results) ret = max(unique_statuses) print "Checked %s volumes (%s critical, %s warning)" % (len( results), unique_statuses[nagios.CRIT], unique_statuses[nagios.WARN]) for pod, container, mount, disk_usage, inode_usage, status in results: if max(disk_usage, inode_usage) > minimum: print "%s: %s:%s:%s - bytes used: %s%%, inodes used: %s%%" % ( nagios.status_code_to_label(status), pod, container, mount, disk_usage, inode_usage) if errors: ret = nagios.UNKNOWN for pod_name, container_name, ex in errors: print "%s: %s:%s %s" % (nagios.status_code_to_label("WARNING"), pod_name, container_name, ex) return ret
def report(results, errors, minimum): if not results: return nagios.UNKNOWN unique_statuses = Counter( disk_status for pod, container, mount, space_usage, inode_usage, disk_status in results ) ret = max(unique_statuses) print "Checked %s volumes (%s critical, %s warning)" % ( len(results), unique_statuses[nagios.CRIT], unique_statuses[nagios.WARN]) for pod, container, mount, disk_usage, inode_usage, status in results: if max(disk_usage, inode_usage) > minimum: print "%s: %s:%s:%s - bytes used: %s%%, inodes used: %s%%" % ( nagios.status_code_to_label(status), pod, container, mount, disk_usage, inode_usage) if errors: ret = nagios.UNKNOWN for pod_name, container_name, ex in errors: print "%s: %s:%s %s" % ( nagios.status_code_to_label("WARNING"), pod_name, container_name, ex) return ret
def report(results): if not results: return nagios.UNKNOWN unique_statuses = Counter( disk_status for pod, mount, space_usage, inode_usage, disk_status in results) ret = max(unique_statuses) if ret == nagios.OK: print "%s: All %s volumes are under the warning threshold" % ( nagios.status_code_to_label(ret), len(results)) elif ret == nagios.UNKNOWN: print "%s: Unable to determine usage on %s volumes" % ( nagios.status_code_to_label(ret), unique_statuses[nagios.UNKNOWN]) elif ret == nagios.WARN: print "%s: There are %s volumes over the warning threshold" % ( nagios.status_code_to_label(ret), unique_statuses[nagios.WARN]) else: print "%s: There are %s volumes over the critical threshold and %s volumes over the warning threshold" % ( nagios.status_code_to_label(ret), unique_statuses[nagios.CRIT], unique_statuses[nagios.WARN]) for pod, mount, disk_usage, inode_usage, status in results: print "%s: %s:%s - bytes used: %s%%, inodes used: %s%%" % ( nagios.status_code_to_label(status), pod, mount, disk_usage, inode_usage) return ret
def report(results, errors): if not results: return nagios.UNKNOWN unique_statuses = Counter( status for pod, container, cpu_limit, mem_limit, status in results ) ret = max(unique_statuses) if ret == nagios.OK: print "%s: All %s containers have memory and cpu limits set" % ( nagios.status_code_to_label(ret), len(results)) elif ret == nagios.UNKNOWN: print "%s: Unable to determine cpu and memory limits on %s containers" % ( nagios.status_code_to_label(ret), unique_statuses[nagios.UNKNOWN]) elif ret == nagios.WARN: print "%s: There are %s containers that do not have both a cpu and memory limit set" % ( nagios.status_code_to_label(ret), unique_statuses[nagios.WARN]) for pod, container, cpu_limit, mem_limit, status in results: print "%s: %s:%s: - memory limit: %s - cpu limit: %s" % ( nagios.status_code_to_label(status), pod, container, mem_limit, cpu_limit) if errors: ret = nagios.UNKNOWN for pod_name, container_name, ex in errors: print "%s: %s:%s %s" % ( nagios.status_code_to_label("WARNING"), pod_name, container_name, ex) return ret
def report(pods, rs_statuses, nag_statuses): if not pods: print "%s: Unable to locate any pods running mongodb" % ( nagios.status_code_to_label(nagios.UNKNOWN), ) return nagios.UNKNOWN unique_rs_statuses = Counter(rs_status for rs_status in rs_statuses) unique_nag_statuses = Counter(nag_status for nag_status in nag_statuses) ret = max(unique_nag_statuses) if unique_rs_statuses[REPLSET_STATUS.PRIMARY] != 1: ret = nagios.CRIT print "%s: There are %s nodes claiming to be primary members of the replica set" % ( nagios.status_code_to_label(ret), unique_rs_statuses[REPLSET_STATUS.PRIMARY]) elif len(rs_statuses) % 2 == 0: ret = nagios.WARN print "%s: There are an even number of voting members (%s) partipating in the replica set" % ( nagios.status_code_to_label(ret), len(rs_statuses)) else: print "%s: There are %s primary and %s secondary members in the replica set" % ( (nagios.status_code_to_label(ret)), unique_rs_statuses[REPLSET_STATUS.PRIMARY], unique_rs_statuses[REPLSET_STATUS.SECONDARY]) for pod, rs_status, nag_status in zip(pods, rs_statuses, nag_statuses): print "%s: %s - %s" % (nagios.status_code_to_label(nag_status), pod, REPLSET_STATUS.reverse_mapping[rs_status]) return ret
def report(results, errors): if errors: return nagios.UNKNOWN for result in results: if result == nagios.CRIT: print 'Unable to contact the androidsdk - Status %s' % ( nagios.status_code_to_label(result)) return result elif result == nagios.OK: print 'Able to contact the androidsdk - Status %s' % ( nagios.status_code_to_label(result)) return result return nagios.UNKNOWN
def report(results, errors): if errors: for status, ex in errors: print "%s , %s" % (nagios.status_code_to_label("WARNING"), ex) return nagios.UNKNOWN for result in results: if result == nagios.CRIT: print 'AndroidSDK PersistentVolumeClaim is not Bound - Status %s' % ( nagios.status_code_to_label(result)) return result elif result == nagios.OK: print 'AndroidSDK PersistentVolumeClaim is Bound - Status %s' % ( nagios.status_code_to_label(result)) return result elif result == nagios.UNKNOWN: print 'Unable to determine the status of the AndroidSDK PersistentVolumeClaim'
def report(results, errors): if not results: return nagios.UNKNOWN unique_statuses = Counter(status for pod, container, memory_total, memory_used, usage, status in results) ret = max(unique_statuses) if ret == nagios.OK: print "%s: All %s containers are under the warning threshold" % ( nagios.status_code_to_label(ret), len(results)) elif ret == nagios.UNKNOWN: print "%s: Unable to determine usage on %s containers" % ( nagios.status_code_to_label(ret), unique_statuses[nagios.UNKNOWN]) elif ret == nagios.WARN: print "%s: There are %s containers over the warning threshold" % ( nagios.status_code_to_label(ret), unique_statuses[nagios.WARN]) else: print "%s: There are %s containers over the critical threshold and %s containers over the warning threshold" % ( nagios.status_code_to_label(ret), unique_statuses[nagios.CRIT], unique_statuses[nagios.WARN]) for pod, container, memory_total, memory_used, usage, status in results: print "%s: %s:%s: - usage: %.1f%%" % ( nagios.status_code_to_label(status), pod, container, usage) if errors: ret = nagios.UNKNOWN for pod_name, container_name, ex in errors: print "%s: %s:%s %s" % (nagios.status_code_to_label("WARNING"), pod_name, container_name, ex) return ret
def report(results, errors): if not results: return nagios.UNKNOWN unique_statuses = Counter(computer[1] for computer in results) ret = max(unique_statuses) for computer in results: if computer[1] == nagios.CRIT: print 'Machine: %s is unavailable to Jenkins - Status is %s ' % ( computer[0], nagios.status_code_to_label(computer[1])) elif computer[1] == nagios.OK: print 'Machine: %s is available to Jenkins - Status is %s ' % ( computer[0], nagios.status_code_to_label(computer[1])) elif computer[1] == nagios.UNKNOWN: print 'Machine: %s status is %s ' % ( computer[0], nagios.status_code_to_label(computer[1])) else: print 'Unable to determine the status of the mac machine' if ret == nagios.OK: print "%s: %s ios slave machine(s) is/are available" % ( nagios.status_code_to_label(ret), len(results)) elif ret == nagios.UNKNOWN: print "%s: Unable to determine status of %s ios slave machine(s)" % ( nagios.status_code_to_label(ret), len(results)) elif ret == nagios.CRIT: print "%s: Unable to contact %s ios slave machine(s)" % ( nagios.status_code_to_label(ret), len(results)) if errors: ret = nagios.UNKNOWN return ret
def report(pods, project, result): if not pods: print "%s: Unable to locate any pods running mongodb" % ( nagios.status_code_to_label(nagios.UNKNOWN), ) return nagios.UNKNOWN code = nagios.OK today = time.strftime('%Y%m%d') collections_missing_count = 0 for topic, exist in result.iteritems(): # fh-reporting fhact_yyyymmdd collections are not stored in the core database if project == "rhmap-core" and topic == "fhact": continue # fh-reporting database on the mbaas contains only the fhact_yyyymmdd collections elif project != "rhmap-core" and topic != "fhact": continue if not exist: print "Collection %s_%s does not exist" % (topic, today) code = nagios.WARN collections_missing_count += 1 # If more than one of the collections is missing on the core its potentially an issue # with millicore if project == "rhmap-core" and collections_missing_count > 1: code = nagios.CRIT # If the fhact collection is missing in the mbaas, its potentially an issue with millicore elif project != "rhmap-core" and collections_missing_count == 1: code = nagios.CRIT if code == nagios.OK: print "OK. Collections exist" return code
def report(results, errors): if not results: return nagios.UNKNOWN unique_statuses = Counter( computer[1] for computer in results ) ret = max(unique_statuses) for computer in results: if computer[1] == nagios.CRIT: print 'Machine: %s is unavailable to Jenkins - Status is %s ' % ( computer[0], nagios.status_code_to_label(computer[1])) elif computer[1] == nagios.OK: print 'Machine: %s is available to Jenkins - Status is %s ' % ( computer[0], nagios.status_code_to_label(computer[1])) elif computer[1] == nagios.UNKNOWN: print 'Machine: %s status is %s ' % ( computer[0], nagios.status_code_to_label(computer[1])) else: print 'Unable to determine the status of the mac machine' if ret == nagios.OK: print "%s: %s ios slave machine(s) is/are available" % ( nagios.status_code_to_label(ret), len(results)) elif ret == nagios.UNKNOWN: print "%s: Unable to determine status of %s ios slave machine(s)" % ( nagios.status_code_to_label(ret), len(results)) elif ret == nagios.CRIT: print "%s: Unable to contact %s ios slave machine(s)" % ( nagios.status_code_to_label(ret), len(results)) if errors: ret = nagios.UNKNOWN return ret