Пример #1
0
def report(pods, rs_statuses, nag_statuses):
    if not pods:
        print "%s: Unable to locate any pods running mongodb" % (
            nagios.status_code_to_label(nagios.UNKNOWN),)
        return nagios.UNKNOWN

    unique_rs_statuses = Counter(rs_status for rs_status in rs_statuses)
    unique_nag_statuses = Counter(nag_status for nag_status in nag_statuses)

    ret = max(unique_nag_statuses)

    if unique_rs_statuses[REPLSET_STATUS.PRIMARY] != 1:
        ret = nagios.CRIT
        print "%s: There are %s nodes claiming to be primary members of the replica set" % (
            nagios.status_code_to_label(ret), unique_rs_statuses[REPLSET_STATUS.PRIMARY])
    elif len(rs_statuses) % 2 == 0:
        ret = nagios.WARN
        print "%s: There are an even number of voting members (%s) partipating in the replica set" % (
            nagios.status_code_to_label(ret), len(rs_statuses))
    else:
        print "%s: There are %s primary and %s secondary members in the replica set" % (
            (nagios.status_code_to_label(ret)),
            unique_rs_statuses[REPLSET_STATUS.PRIMARY],
            unique_rs_statuses[REPLSET_STATUS.SECONDARY])

    for pod, rs_status, nag_status in zip(pods, rs_statuses, nag_statuses):
        print "%s: %s - %s" % (
            nagios.status_code_to_label(nag_status),
            pod,
            REPLSET_STATUS.reverse_mapping[rs_status])

    return ret
Пример #2
0
def report(results, errors):
    if not results:
        return nagios.UNKNOWN

    unique_statuses = Counter(
        status for pod, container, cpu_limit, mem_limit, status in results)

    ret = max(unique_statuses)

    if ret == nagios.OK:
        print "%s: All %s containers have memory and cpu limits set" % (
            nagios.status_code_to_label(ret), len(results))
    elif ret == nagios.UNKNOWN:
        print "%s: Unable to determine cpu and memory limits on %s containers" % (
            nagios.status_code_to_label(ret), unique_statuses[nagios.UNKNOWN])
    elif ret == nagios.WARN:
        print "%s: There are %s containers that do not have both a cpu and memory limit set" % (
            nagios.status_code_to_label(ret), unique_statuses[nagios.WARN])

    for pod, container, cpu_limit, mem_limit, status in results:
        print "%s: %s:%s: - memory limit: %s - cpu limit: %s" % (
            nagios.status_code_to_label(status), pod, container, mem_limit,
            cpu_limit)

    if errors:
        ret = nagios.UNKNOWN
        for pod_name, container_name, ex in errors:
            print "%s: %s:%s %s" % (nagios.status_code_to_label("WARNING"),
                                    pod_name, container_name, ex)

    return ret
Пример #3
0
def report(results, errors, minimum):
    if not results:
        return nagios.UNKNOWN

    unique_statuses = Counter(disk_status for pod, container, mount,
                              space_usage, inode_usage, disk_status in results)

    ret = max(unique_statuses)

    print "Checked %s volumes (%s critical, %s warning)" % (len(
        results), unique_statuses[nagios.CRIT], unique_statuses[nagios.WARN])

    for pod, container, mount, disk_usage, inode_usage, status in results:
        if max(disk_usage, inode_usage) > minimum:
            print "%s: %s:%s:%s - bytes used: %s%%, inodes used: %s%%" % (
                nagios.status_code_to_label(status), pod, container, mount,
                disk_usage, inode_usage)

    if errors:
        ret = nagios.UNKNOWN
        for pod_name, container_name, ex in errors:
            print "%s: %s:%s %s" % (nagios.status_code_to_label("WARNING"),
                                    pod_name, container_name, ex)

    return ret
Пример #4
0
def report(results, errors, minimum):
    if not results:
        return nagios.UNKNOWN

    unique_statuses = Counter(
        disk_status
        for pod, container, mount, space_usage, inode_usage, disk_status in results
    )

    ret = max(unique_statuses)

    print "Checked %s volumes (%s critical, %s warning)" % (
        len(results), unique_statuses[nagios.CRIT], unique_statuses[nagios.WARN])

    for pod, container, mount, disk_usage, inode_usage, status in results:
        if max(disk_usage, inode_usage) > minimum:
            print "%s: %s:%s:%s - bytes used: %s%%, inodes used: %s%%" % (
                nagios.status_code_to_label(status), pod, container, mount, disk_usage, inode_usage)

    if errors:
        ret = nagios.UNKNOWN
        for pod_name, container_name, ex in errors:
            print "%s: %s:%s %s" % (
                nagios.status_code_to_label("WARNING"), pod_name, container_name, ex)

    return ret
Пример #5
0
def report(results):
    if not results:
        return nagios.UNKNOWN

    unique_statuses = Counter(
        disk_status
        for pod, mount, space_usage, inode_usage, disk_status in results)

    ret = max(unique_statuses)

    if ret == nagios.OK:
        print "%s: All %s volumes are under the warning threshold" % (
            nagios.status_code_to_label(ret), len(results))
    elif ret == nagios.UNKNOWN:
        print "%s: Unable to determine usage on %s volumes" % (
            nagios.status_code_to_label(ret), unique_statuses[nagios.UNKNOWN])
    elif ret == nagios.WARN:
        print "%s: There are %s volumes over the warning threshold" % (
            nagios.status_code_to_label(ret), unique_statuses[nagios.WARN])
    else:
        print "%s: There are %s volumes over the critical threshold and %s volumes over the warning threshold" % (
            nagios.status_code_to_label(ret), unique_statuses[nagios.CRIT],
            unique_statuses[nagios.WARN])

    for pod, mount, disk_usage, inode_usage, status in results:
        print "%s: %s:%s - bytes used: %s%%, inodes used: %s%%" % (
            nagios.status_code_to_label(status), pod, mount, disk_usage,
            inode_usage)

    return ret
Пример #6
0
def report(results, errors):
    if not results:
        return nagios.UNKNOWN

    unique_statuses = Counter(
        status
        for pod, container, cpu_limit, mem_limit, status in results
    )

    ret = max(unique_statuses)

    if ret == nagios.OK:
        print "%s: All %s containers have memory and cpu limits set" % (
            nagios.status_code_to_label(ret), len(results))
    elif ret == nagios.UNKNOWN:
        print "%s: Unable to determine cpu and memory limits on %s containers" % (
            nagios.status_code_to_label(ret), unique_statuses[nagios.UNKNOWN])
    elif ret == nagios.WARN:
        print "%s: There are %s containers that do not have both a cpu and memory limit set" % (
            nagios.status_code_to_label(ret), unique_statuses[nagios.WARN])

    for pod, container, cpu_limit, mem_limit, status in results:
        print "%s: %s:%s: - memory limit: %s - cpu limit: %s" % (
            nagios.status_code_to_label(status), pod, container, mem_limit, cpu_limit)

    if errors:
        ret = nagios.UNKNOWN
        for pod_name, container_name, ex in errors:
            print "%s: %s:%s %s" % (
                nagios.status_code_to_label("WARNING"), pod_name, container_name, ex)

    return ret
Пример #7
0
def report(pods, rs_statuses, nag_statuses):
    if not pods:
        print "%s: Unable to locate any pods running mongodb" % (
            nagios.status_code_to_label(nagios.UNKNOWN), )
        return nagios.UNKNOWN

    unique_rs_statuses = Counter(rs_status for rs_status in rs_statuses)
    unique_nag_statuses = Counter(nag_status for nag_status in nag_statuses)

    ret = max(unique_nag_statuses)

    if unique_rs_statuses[REPLSET_STATUS.PRIMARY] != 1:
        ret = nagios.CRIT
        print "%s: There are %s nodes claiming to be primary members of the replica set" % (
            nagios.status_code_to_label(ret),
            unique_rs_statuses[REPLSET_STATUS.PRIMARY])
    elif len(rs_statuses) % 2 == 0:
        ret = nagios.WARN
        print "%s: There are an even number of voting members (%s) partipating in the replica set" % (
            nagios.status_code_to_label(ret), len(rs_statuses))
    else:
        print "%s: There are %s primary and %s secondary members in the replica set" % (
            (nagios.status_code_to_label(ret)),
            unique_rs_statuses[REPLSET_STATUS.PRIMARY],
            unique_rs_statuses[REPLSET_STATUS.SECONDARY])

    for pod, rs_status, nag_status in zip(pods, rs_statuses, nag_statuses):
        print "%s: %s - %s" % (nagios.status_code_to_label(nag_status), pod,
                               REPLSET_STATUS.reverse_mapping[rs_status])

    return ret
def report(results, errors):
    if errors:
        return nagios.UNKNOWN

    for result in results:
        if result == nagios.CRIT:
            print 'Unable to contact the androidsdk - Status %s' % (
                nagios.status_code_to_label(result))
            return result
        elif result == nagios.OK:
            print 'Able to contact the androidsdk - Status %s' % (
                nagios.status_code_to_label(result))
            return result
    return nagios.UNKNOWN
Пример #9
0
def report(results, errors):
    if errors:
        for status, ex in errors:
            print "%s , %s" % (nagios.status_code_to_label("WARNING"), ex)
        return nagios.UNKNOWN

    for result in results:
        if result == nagios.CRIT:
            print 'AndroidSDK PersistentVolumeClaim is not Bound - Status %s' % (
                nagios.status_code_to_label(result))
            return result
        elif result == nagios.OK:
            print 'AndroidSDK PersistentVolumeClaim is Bound - Status %s' % (
                nagios.status_code_to_label(result))
            return result
        elif result == nagios.UNKNOWN:
            print 'Unable to determine the status of the AndroidSDK PersistentVolumeClaim'
Пример #10
0
def report(results, errors):
    if not results:
        return nagios.UNKNOWN

    unique_statuses = Counter(status for pod, container, memory_total,
                              memory_used, usage, status in results)

    ret = max(unique_statuses)

    if ret == nagios.OK:
        print "%s: All %s containers are under the warning threshold" % (
            nagios.status_code_to_label(ret), len(results))
    elif ret == nagios.UNKNOWN:
        print "%s: Unable to determine usage on %s containers" % (
            nagios.status_code_to_label(ret), unique_statuses[nagios.UNKNOWN])
    elif ret == nagios.WARN:
        print "%s: There are %s containers over the warning threshold" % (
            nagios.status_code_to_label(ret), unique_statuses[nagios.WARN])
    else:
        print "%s: There are %s containers over the critical threshold and %s containers over the warning threshold" % (
            nagios.status_code_to_label(ret), unique_statuses[nagios.CRIT],
            unique_statuses[nagios.WARN])

    for pod, container, memory_total, memory_used, usage, status in results:
        print "%s: %s:%s: - usage: %.1f%%" % (
            nagios.status_code_to_label(status), pod, container, usage)

    if errors:
        ret = nagios.UNKNOWN
        for pod_name, container_name, ex in errors:
            print "%s: %s:%s %s" % (nagios.status_code_to_label("WARNING"),
                                    pod_name, container_name, ex)

    return ret
Пример #11
0
def report(results, errors):

    if not results:
        return nagios.UNKNOWN

    unique_statuses = Counter(computer[1] for computer in results)

    ret = max(unique_statuses)

    for computer in results:
        if computer[1] == nagios.CRIT:
            print 'Machine: %s is unavailable to Jenkins - Status is %s ' % (
                computer[0], nagios.status_code_to_label(computer[1]))
        elif computer[1] == nagios.OK:
            print 'Machine: %s is available to Jenkins - Status is %s ' % (
                computer[0], nagios.status_code_to_label(computer[1]))
        elif computer[1] == nagios.UNKNOWN:
            print 'Machine: %s status is %s ' % (
                computer[0], nagios.status_code_to_label(computer[1]))
        else:
            print 'Unable to determine the status of the mac machine'

    if ret == nagios.OK:
        print "%s: %s ios slave machine(s) is/are available" % (
            nagios.status_code_to_label(ret), len(results))
    elif ret == nagios.UNKNOWN:
        print "%s: Unable to determine status of %s ios slave machine(s)" % (
            nagios.status_code_to_label(ret), len(results))
    elif ret == nagios.CRIT:
        print "%s: Unable to contact %s ios slave machine(s)" % (
            nagios.status_code_to_label(ret), len(results))
    if errors:
        ret = nagios.UNKNOWN

    return ret
def report(pods, project, result):
    if not pods:
        print "%s: Unable to locate any pods running mongodb" % (
            nagios.status_code_to_label(nagios.UNKNOWN), )
        return nagios.UNKNOWN

    code = nagios.OK
    today = time.strftime('%Y%m%d')
    collections_missing_count = 0

    for topic, exist in result.iteritems():
        # fh-reporting fhact_yyyymmdd collections are not stored in the core database
        if project == "rhmap-core" and topic == "fhact":
            continue
        # fh-reporting database on the mbaas contains only the fhact_yyyymmdd collections
        elif project != "rhmap-core" and topic != "fhact":
            continue

        if not exist:
            print "Collection %s_%s does not exist" % (topic, today)
            code = nagios.WARN
            collections_missing_count += 1

    # If more than one of the collections is missing on the core its potentially an issue
    # with millicore
    if project == "rhmap-core" and collections_missing_count > 1:
        code = nagios.CRIT

    # If the fhact collection is missing in the mbaas, its potentially an issue with millicore
    elif project != "rhmap-core" and collections_missing_count == 1:
        code = nagios.CRIT

    if code == nagios.OK:
        print "OK. Collections exist"

    return code
def report(results, errors):

    if not results:
        return nagios.UNKNOWN

    unique_statuses = Counter(
        computer[1]
        for computer in results
        )

    ret = max(unique_statuses)

    for computer in results:
        if computer[1] == nagios.CRIT:
            print 'Machine: %s is unavailable to Jenkins - Status is %s ' % (
                computer[0], nagios.status_code_to_label(computer[1]))
        elif computer[1] == nagios.OK:
            print 'Machine: %s is available to Jenkins - Status is %s ' % (
                computer[0], nagios.status_code_to_label(computer[1]))
        elif computer[1] == nagios.UNKNOWN:
            print 'Machine: %s status is %s ' % (
                computer[0], nagios.status_code_to_label(computer[1]))
        else:
            print 'Unable to determine the status of the mac machine'

    if ret == nagios.OK:
        print "%s: %s ios slave machine(s) is/are available" % (
            nagios.status_code_to_label(ret), len(results))
    elif ret == nagios.UNKNOWN:
        print "%s: Unable to determine status of %s ios slave machine(s)" % (
            nagios.status_code_to_label(ret), len(results))
    elif ret == nagios.CRIT:
        print "%s: Unable to contact %s ios slave machine(s)" % (
            nagios.status_code_to_label(ret), len(results))
    if errors:
        ret = nagios.UNKNOWN

    return ret