class OpenshiftPodChecker(object):
    """ Checks for Openshift Pods """

    def __init__(self):
        self.args = None
        self.ora = None
        self.zagg_sender = None

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.ora = OpenshiftRestApi()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        try:
            self.get_pods()

        except Exception as ex:
            print "Problem retreiving pod data: %s " % ex.message

        self.zagg_sender.send_metrics()

    def get_pods(self):
        """ Gets pod data """

        print "\nPerforming pod check ...\n"

        api_url = "/api/v1/pods"
        if (str(self.args.namespace) != "None") & (str(self.args.namespace) != "all"):
            api_url = "/api/v1/namespaces/{}/pods".format(self.args.namespace)

        api_yaml = self.ora.get(api_url, rtype="text")
        pods = yaml.safe_load(api_yaml)

        pod_count = 0
        for pod in pods["items"]:
            if self.args.pod and self.args.pod in pod["metadata"]["name"]:
                print "status of {} is {}".format(pod["metadata"]["name"], pod["status"]["phase"])
                if pod["status"]["phase"] == "Running":
                    pod_count += 1
            else:
                pass

        self.zagg_sender.add_zabbix_keys({"service.pod.{}.count".format(self.args.pod): pod_count})

    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description="Openshift pod sender")
        parser.add_argument("-p", "--pod", default=None, help="Check for pod with this specific name")
        parser.add_argument("-n", "--namespace", default=None, help='Check for pods in this namespace - "all" for all')
        parser.add_argument("-v", "--verbose", action="store_true", default=None, help="Verbose?")
        parser.add_argument("--debug", action="store_true", default=None, help="Debug?")

        self.args = parser.parse_args()
def main():
    ''' Do the application creation
    '''
    print '################################################################################'
    print '  Starting App Create'
    print '################################################################################'
    namespace = 'ops-monitor-' + os.environ['ZAGG_CLIENT_HOSTNAME']
    oocmd = OpenShiftOC(namespace, 'hello-openshift', verbose=False)
    app = 'openshift/hello-openshift:v1.0.6'

    start_time = time.time()
    if namespace in  oocmd.get_projects():
        oocmd.delete_project()

    oocmd.new_project()

    oocmd.new_app(app)

    create_app = 1
    pod = None
    # Now we wait until the pod comes up
    for _ in range(24):
        time.sleep(5)
        pod = oocmd.get_pod()
        if pod and pod['status']:
            print 'Polling Pod status: %s' % pod['status']['phase']
        if pod and pod['status']['phase'] == 'Running' and pod['status'].has_key('podIP'):
            #c_results = curl(pod['status']['podIP'], '8080')
            #if c_results == 'Hello OpenShift!\n':
            print 'Finished.'
            print 'State: Success'
            print 'Time: %s' % str(time.time() - start_time)
            create_app = 0
            break

    else:
        print 'Finished.'
        print 'State: Fail'
        print 'Time: %s' % str(time.time() - start_time)
        print 'Fetching Events:'
        oocmd.verbose = True
        print oocmd.get_events()
        print 'Fetching Logs:'
        print oocmd.get_logs()
        print 'Fetching Pod:'
        print pod

    if namespace in oocmd.get_projects():
        oocmd.delete_project()

    zgs = ZaggSender()
    zgs.add_zabbix_keys({'openshift.master.app.create': create_app})
    zgs.send_metrics()
    def config_zagg_sender(self):
        """ configure the zagg_sender """

        zagg_url = self.args.zagg_url if self.args.zagg_url else self.config['zagg']['url']
        zagg_user = self.args.zagg_user if self.args.zagg_user else self.config['zagg']['user']
        zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config['zagg']['pass']

        zagg_ssl_verify = self.config['zagg'].get('ssl_verify', False)
        zagg_verbose = self.config['zagg'].get('verbose', False)

        if isinstance(zagg_verbose, str):
            zagg_verbose = (zagg_verbose == 'True')

        if self.args.verbose:
            zagg_verbose = self.args.verbose

        if isinstance(zagg_ssl_verify, str):
            zagg_ssl_verify = (zagg_ssl_verify == 'True')

        if self.args.zagg_ssl_verify:
            zagg_ssl_verify = self.args.zagg_ssl_verify

        zagg_conn = ZaggConnection(url=zagg_url,
                                   user=zagg_user,
                                   password=zagg_password,
                                   ssl_verify=zagg_ssl_verify,
                                   verbose=zagg_verbose,
                                  )

        host = self.args.host if self.args.host else self.config['host']['name']
        self.zagg_sender = ZaggSender(host, zagg_conn)
    def config_zagg_sender(self):
        """ configure the zagg_sender """

        zagg_url = self.args.zagg_url if self.args.zagg_url else self.config["zagg"]["url"]
        zagg_user = self.args.zagg_user if self.args.zagg_user else self.config["zagg"]["user"]
        zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config["zagg"]["pass"]
        zagg_verbose = self.args.verbose if self.args.verbose else self.config["zagg"]["verbose"]
        zagg_debug = self.args.debug if self.args.debug else self.config["zagg"]["debug"]
        zagg_ssl_verify = self.args.zagg_ssl_verify if self.args.zagg_ssl_verify else self.config["zagg"]["ssl_verify"]
        host = self.args.host if self.args.host else self.config["host"]["name"]

        if isinstance(zagg_verbose, str):
            zagg_verbose = zagg_verbose == "True"

        if isinstance(zagg_debug, str):
            zagg_debug = zagg_debug == "True"

        if isinstance(zagg_ssl_verify, str):
            zagg_ssl_verify = zagg_ssl_verify == "True"

        zagg_conn = ZaggConnection(
            url=zagg_url, user=zagg_user, password=zagg_password, ssl_verify=zagg_ssl_verify, debug=zagg_debug
        )

        self.zagg_sender = ZaggSender(host, zagg_conn, zagg_verbose, zagg_debug)
示例#5
0
    def config_zagg_sender(self):
        """ configure the zagg_sender """

        zagg_url = self.args.zagg_url if self.args.zagg_url else self.config['zagg']['url']
        zagg_user = self.args.zagg_user if self.args.zagg_user else self.config['zagg']['user']
        zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config['zagg']['pass']
        zagg_verbose = self.args.verbose if self.args.verbose else self.config['zagg']['verbose']
        zagg_debug = self.args.debug if self.args.debug else self.config['zagg']['debug']
        zagg_ssl_verify = self.args.zagg_ssl_verify if self.args.zagg_ssl_verify else self.config['zagg']['ssl_verify']
        host = self.args.host if self.args.host else self.config['host']['name']

        if isinstance(zagg_verbose, str):
            zagg_verbose = (zagg_verbose == 'True')

        if isinstance(zagg_debug, str):
            zagg_debug = (zagg_debug == 'True')

        if isinstance(zagg_ssl_verify, str):
            zagg_ssl_verify = (zagg_ssl_verify == 'True')

        zagg_conn = ZaggConnection(url=zagg_url,
                                   user=zagg_user,
                                   password=zagg_password,
                                   ssl_verify=zagg_ssl_verify,
                                   debug=zagg_debug,
                                  )

        self.zagg_sender = ZaggSender(host, zagg_conn, zagg_verbose, zagg_debug)
def main():
    ''' Do the application creation
    '''
    proj_name = 'ops-monitor-appbuild' + os.environ['ZAGG_CLIENT_HOSTNAME']
    app = 'nodejs-example'
    verbose = True

    start_time = time.time()
    if proj_name in  OpenShiftOC.get_projects(verbose):
        OpenShiftOC.delete_project(proj_name, verbose)

    OpenShiftOC.new_project(proj_name, verbose)

    OpenShiftOC.new_app(app, proj_name, verbose)
    #1 is error
    create_app = 1
    BuildTime = 0
    CreateTime = 0
    # Now we wait until the pod comes up
    for _ in range(24):
        time.sleep(10)
        #checking the building pod
        buildPod = OpenShiftOC.get_build_pod(app, proj_name, verbose)
        if buildPod and buildPod['status']['phase'] == 'Failed':
            BuildTime = time.time() - start_time
            print 'fail'
            break
        if buildPod and buildPod['status']['phase'] == 'Succeeded':
            BuildTime = time.time() - start_time
            for _ in range(24):
                time.sleep(5)
                create_app = check_route(app, proj_name, verbose)
                if create_app == 0:
                    CreateTime = time.time() - start_time
                    print 'success'
                    print 'Time: %s' % CreateTime
                    print 'BuildTime: %s' % BuildTime
                    break
            if create_app == 0:
                break
    else:
        BuildTime = time.time() - start_time
        print 'BuildTime: %s' % BuildTime
        print 'fail'
    if proj_name in  OpenShiftOC.get_projects(verbose):
        OpenShiftOC.delete_project(proj_name, verbose)

    zgs = ZaggSender()
    zgs.add_zabbix_keys({'openshift.master.app.build.create': create_app})
    zgs.add_zabbix_keys({'openshift.master.app.create.time': CreateTime})
    zgs.add_zabbix_keys({'openshift.master.app.build.time': BuildTime})
    zgs.send_metrics()
def main():
    ''' Do the application creation
    '''
    proj_name = 'ops-monitor-' + os.environ['ZAGG_CLIENT_HOSTNAME']
    app = 'openshift/hello-openshift:v1.0.6'
    verbose = False

    if len(sys.argv) > 1 and sys.argv[1] == '-v':
        verbose = True

    start_time = time.time()
    if proj_name in  OpenShiftOC.get_projects(verbose):
        OpenShiftOC.delete_project(proj_name, verbose)

    OpenShiftOC.new_project(proj_name, verbose)

    OpenShiftOC.new_app(app, proj_name, verbose)

    create_app = 1
    # Now we wait until the pod comes up
    for _ in range(24):
        time.sleep(5)
        pod = OpenShiftOC.get_pod('hello-openshift', proj_name, verbose)
        if pod and pod['status']:
            if verbose:
                print pod['status']['phase']
        if pod and pod['status']['phase'] == 'Running' and pod['status'].has_key('podIP'):
            #c_results = curl(pod['status']['podIP'], '8080')
            #if c_results == 'Hello OpenShift!\n':
            if verbose:
                print 'success'
                print 'Time: %s' % str(time.time() - start_time)
            create_app = 0
            break

    else:
        if verbose:
            print 'Time: %s' % str(time.time() - start_time)
            print 'fail'

    if proj_name in  OpenShiftOC.get_projects(verbose):
        OpenShiftOC.delete_project(proj_name, verbose)

    zgs = ZaggSender()
    zgs.add_zabbix_keys({'openshift.master.app.create': create_app})
    zgs.send_metrics()
    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        if self.check_dns_port_alive():
            self.get_openshift_services()
            self.do_dns_check()

        self.zagg_sender.send_metrics()
def main():
    """ Get data from oadm and send to zabbix
    """

    ## set oadm config
    oadm_command = "KUBECONFIG=/etc/openshift/master/admin.kubeconfig /usr/bin/oadm"

    ## get list of running pods
    podlist_cmd = oadm_command + " manage-node --list-pods --selector=''"

    # get the output of oadm
    output = subprocess.check_output(podlist_cmd, shell=True)

    # pare down to only lines that contain "Running"
    running_pods_list = [p for p in output.split("\n") if "Running" in p]

    # we now have all the data we want.  Let's send it to Zagg
    zs = ZaggSender()
    zs.add_zabbix_keys({"running_pods_count": len(running_pods_list)})

    # Finally, sent them to zabbix
    zs.send_metrics()
def main():
    """  Main function to run the check """

    args = parse_args()
    zagg_sender = ZaggSender(verbose=args.verbose, debug=args.debug)

    discovery_key_disk = 'disc.disk'
    interval = 3
    pcp_disk_dev_metrics = ['disk.dev.total', 'disk.dev.avactive']
    item_prototype_macro_disk = '#OSO_DISK'
    item_prototype_key_tps = 'disc.disk.tps'
    item_prototype_key_putil = 'disc.disk.putil'

    disk_metrics = pminfo.get_sampled_data(pcp_disk_dev_metrics, interval, 2)

    pcp_metrics_divided = {}
    for metric in pcp_disk_dev_metrics:
        pcp_metrics_divided[metric] = {k: v for k, v in disk_metrics.items() if metric in k}

    # do TPS checks; use disk.dev.total
    filtered_disk_totals = clean_up_metric_dict(pcp_metrics_divided[pcp_disk_dev_metrics[0]],
                                                pcp_disk_dev_metrics[0] + '.')

    # Add dynamic items
    zagg_sender.add_zabbix_dynamic_item(discovery_key_disk, item_prototype_macro_disk, filtered_disk_totals.keys())

    # calculate the TPS and add them to the ZaggSender
    for disk, totals in filtered_disk_totals.iteritems():
        disk_tps = (totals[1] - totals[0]) / interval
        zagg_sender.add_zabbix_keys({'%s[%s]' % (item_prototype_key_tps, disk): disk_tps})

    # do % Util checks; use disk.dev.avactive
    filtered_disk_totals = clean_up_metric_dict(pcp_metrics_divided[pcp_disk_dev_metrics[1]],
                                                pcp_disk_dev_metrics[1] + '.')

    # calculate the % Util and add them to the ZaggSender
    for disk, totals in filtered_disk_totals.iteritems():
        total_active = (float)(totals[1] - totals[0]) / 1000.0
        putil = 100 * total_active / interval

        zagg_sender.add_zabbix_keys({'%s[%s]' % (item_prototype_key_putil, disk): putil})

    zagg_sender.send_metrics()
    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.ora = OpenshiftRestApi()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        try:
            self.get_pods()

        except Exception as ex:
            print "Problem retreiving pod data: %s " % ex.message

        self.zagg_sender.send_metrics()
    def config_zagg_sender(self):
        """ configure the zagg_sender """

        zagg_server = self.args.zagg_server if self.args.zagg_server else self.config['zagg']['host']
        zagg_user = self.args.zagg_user if self.args.zagg_user else self.config['zagg']['user']
        zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config['zagg']['pass']
        host = self.args.host if self.args.host else self.config['host']['name']

        zagg_conn = ZaggConnection(host=zagg_server,
                                   user=zagg_user,
                                   password=zagg_password,
                                  )

        self.zagg_sender = ZaggSender(host, zagg_conn)
class OpenshiftSkyDNSZaggClient(object):
    """ Checks for the Openshift Master SkyDNS """

    def __init__(self):
        self.args = None
        self.zagg_sender = None
        self.ora = OpenshiftRestApi()
        self.dns_host = '127.0.0.1'
        self.dns_port = 53
        self.openshift_services = []

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        if self.check_dns_port_alive():
            self.get_openshift_services()
            self.do_dns_check()

        self.zagg_sender.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description='Network metric sender')
        parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?')
        parser.add_argument('--debug', action='store_true', default=None, help='Debug?')

        self.args = parser.parse_args()

    def check_dns_port_alive(self):
        """ Verify that the DNS port (TCP 53) is alive """

        print "\nPerforming Openshift DNS port check..."

        try:
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            s.settimeout(1)
            s.connect((self.dns_host, self.dns_port))
            s.close()

            print "\nOpenshift SkyDNS host: %s, port: %s is OPEN" % (self.dns_host, self.dns_port)
            print "================================================\n"
            self.zagg_sender.add_zabbix_keys({'openshift.master.skydns.port.open' : 1})

            return True

        except socket.error, e:
            print "\nOpenshift SkyDNS host: %s, port: %s is CLOSED" % (self.dns_host, self.dns_port)
            print "Python Error: %s" % e
            print "================================================\n"
            self.zagg_sender.add_zabbix_keys({'openshift.master.skydns.port.open' : 0})

            return False
def main():
    """ Gather and send details on all visible S3 buckets """

    discovery_key = "disc.aws"
    discovery_macro = "#S3_BUCKET"
    prototype_s3_size = "disc.aws.size"
    prototype_s3_count = "disc.aws.objects"

    args = parse_args()

    ocutil = OCUtil()
    oc_yaml = ocutil.get_secrets("dockerregistry")

    aws_access, aws_secret = get_aws_creds(oc_yaml)
    awsutil = AWSUtil(aws_access, aws_secret, args.debug)

    bucket_list = awsutil.get_bucket_list(args.debug)

    bucket_stats = {}

    for bucket in bucket_list:
        s3_size, s3_objects = awsutil.get_bucket_info(bucket, args.debug)
        bucket_stats[bucket] = {"size": s3_size, "objects": s3_objects}

    if args.debug:
        print "Bucket stats: " + str(bucket_stats)

    if args.test:
        print "Test-only. Received results: " + str(bucket_stats)
    else:
        zgs = ZaggSender(verbose=args.debug)
        zgs.add_zabbix_dynamic_item(discovery_key, discovery_macro, bucket_list)
        for bucket in bucket_stats.keys():
            zab_key = "{}[{}]".format(prototype_s3_size, bucket)
            zgs.add_zabbix_keys({zab_key: int(round(bucket_stats[bucket]["size"]))})

            zab_key = "{}[{}]".format(prototype_s3_count, bucket)
            zgs.add_zabbix_keys({zab_key: bucket_stats[bucket]["objects"]})
        zgs.send_metrics()
def main():
    """  Main function to run the check """

    args = parse_args()
    zagg_sender = ZaggSender(verbose=args.verbose, debug=args.debug)

    discovery_key_network = 'disc.network'
    pcp_network_dev_metrics = ['network.interface.in.bytes', 'network.interface.out.bytes']
    item_proto_macro_network = '#OSO_NET_INTERFACE'
    item_proto_key_in_bytes = 'disc.network.in.bytes'
    item_proto_key_out_bytes = 'disc.network.out.bytes'

    network_metrics = pminfo.get_metrics(pcp_network_dev_metrics)

    pcp_metrics_divided = {}
    for metric in pcp_network_dev_metrics:
        pcp_metrics_divided[metric] = {k: v for k, v in network_metrics.items() if metric in k}

    # do Network In; use network.interface.in.bytes
    filtered_network_totals = clean_up_metric_dict(pcp_metrics_divided[pcp_network_dev_metrics[0]],
                                                   pcp_network_dev_metrics[0] + '.')

    # Add dynamic items
    zagg_sender.add_zabbix_dynamic_item(discovery_key_network, item_proto_macro_network, filtered_network_totals.keys())

    # Report Network IN bytes; them to the ZaggSender
    for interface, total in filtered_network_totals.iteritems():
        zagg_sender.add_zabbix_keys({'%s[%s]' % (item_proto_key_in_bytes, interface): total})

    # Report Network OUT Bytes;  use network.interface.out.bytes
    filtered_network_totals = clean_up_metric_dict(pcp_metrics_divided[pcp_network_dev_metrics[1]],
                                                   pcp_network_dev_metrics[1] + '.')

    # calculate the % Util and add them to the ZaggSender
    for interface, total in filtered_network_totals.iteritems():

        zagg_sender.add_zabbix_keys({'%s[%s]' % (item_proto_key_out_bytes, interface): total})

    zagg_sender.send_metrics()
    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.ora = OpenshiftRestApi()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        try:
            self.get_service()
            status = self.check_service()

        except Exception as ex:
            print "Problem retreiving data: %s " % ex.message

        self.zagg_sender.add_zabbix_keys({
            "openshift.webservice.{}.status".format(self.args.pod) : status})

        self.zagg_sender.send_metrics()
示例#17
0
    def report_to_zabbix(self, disc_key, disc_macro, item_proto_key, value):
        """ Sends the commands exit code to zabbix. """
        zs = ZaggSender()


        # Add the dynamic item
        self.verbose_print("Adding the dynamic item to Zabbix - %s, %s, [%s]" % \
                           (disc_key, disc_macro, self.args.name))
        zs.add_zabbix_dynamic_item(disc_key, disc_macro, [self.args.name])

        # Send the value for the dynamic item
        self.verbose_print("Sending metric to Zabbix - %s[%s]: %s" % \
                           (item_proto_key, self.args.name, value))
        zs.add_zabbix_keys({'%s[%s]' % (item_proto_key, self.args.name): value})

        # Actually send them
        zs.send_metrics()
def main():
    """ Get data and send to zabbix
    """

    vswitch_ports_count = get_vswitch_ports()
    vswitch_pids_count = get_vswitch_pids()

    print "Found %s OVS ports" % vswitch_ports_count
    print "Found %s OVS pids" % vswitch_pids_count

    # we now have all the data we want.  Let's send it to Zagg
    zs = ZaggSender()
    zs.add_zabbix_keys({"openshift.node.ovs.ports.count": vswitch_ports_count})
    zs.add_zabbix_keys({"openshift.node.ovs.pids.count": vswitch_pids_count})

    # Finally, sent them to zabbix
    zs.send_metrics()
    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.get_kubeconfig()
        ocutil = OCUtil(config_file=self.kubeconfig, verbose=self.args.verbose)
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        try:
            oc_yaml = ocutil.get_service('docker-registry')
            self.get_registry_service(oc_yaml)
            oc_yaml = ocutil.get_endpoint('docker-registry')
            self.get_registry_endpoints(oc_yaml)
        except Exception as ex:
            print "Problem retreiving registry IPs: %s " % ex.message

        self.registry_service_check()
        self.registry_health_check()

        self.zagg_sender.send_metrics()
    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        try:
            if self.args.healthz or self.args.all_checks:
                self.healthz_check()
        except Exception as ex:
            print "Problem performing healthz check: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({'openshift.master.api.healthz' : 'false'})

        try:
            if self.args.api_ping or self.args.all_checks:
                self.api_ping()

            if self.args.project_count or self.args.all_checks:
                self.project_count()

            if self.args.pod_count or self.args.all_checks:
                self.pod_count()

            if self.args.user_count or self.args.all_checks:
                self.user_count()
        except Exception as ex:
            print "Problem Openshift API checks: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({'openshift.master.api.ping' : 0}) # Openshift API is down

        try:
            if self.args.metrics or self.args.all_checks:
                self.metric_check()
        except Exception as ex:
            print "Problem getting Openshift metrics at /metrics: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping' : 0}) # Openshift Metrics are down

        self.zagg_sender.send_metrics()
def main():
    '''Run pminfo against a list of metrics.
       Sample metrics passed in for an amount of time and report data to zabbix
    '''

    args, parser = parse_args()

    if not args.metrics:
        print
        print 'Please specify metrics with -m.'
        print
        parser.print_help()
        sys.exit(1)

    metrics = args.metrics
    interval = int(args.interval)
    count = int(args.count)

    # Gather sampled data
    data = pminfo.get_sampled_data(metrics, interval, count)

    zab_results = collections.defaultdict(list)
    for metric_name, val in data.items():
        if 'kernel' in metric_name:
            for sample in range(len(val)):
                if sample + 1 == len(val):
                    break
                zab_results[metric_name].append(pminfo.calculate_percent_cpu(val[sample], val[sample+1], interval))
        else:
            print 'NOT SUPPORTED: [%s]' % metric_name

        if zab_results.get(metric_name, None) != None and (args.verbose or args.debug):
            print '%s: %.2f' % (metric_name, zab_results[metric_name][-1])

    zab_results = get_averages(zab_results)

    # Send the data to zabbix
    if not args.test:
        zgs = ZaggSender(verbose=args.debug)
        zgs.add_zabbix_keys(zab_results)
        zgs.send_metrics()
def main():
    ''' Get data from oadm and send to zabbix
    '''

    ## set oadm config
    oadm_command = "KUBECONFIG=/etc/openshift/master/admin.kubeconfig /usr/bin/oadm"

    ## get list of running pods
    podlist_cmd = oadm_command + " manage-node --list-pods --selector=''"

    # get the output of oadm
    output = subprocess.check_output(podlist_cmd, shell=True)

    # pare down to only lines that contain "Running"
    running_pods_list = [p for p in output.split('\n') if "Running" in p]

    # we now have all the data we want.  Let's send it to Zagg
    zs = ZaggSender()
    zs.add_zabbix_keys({'running_pods_count': len(running_pods_list)})

    # Finally, sent them to zabbix
    zs.send_metrics()
示例#23
0
    def main(self):
        """ Main function. """

        zag = ZaggSender()
        yaml_config = {}
        config_path = '/etc/openshift_tools/rkhunter_config.yaml'

        if os.path.isfile(config_path):
            with open(config_path, 'r') as rkhunter_config:
                yaml_config = yaml.load(rkhunter_config)

        logfile = yaml_config["logfile"]

        checks = {
            "rkhunter.found.warning": r"\[ warning \]",
            "rkhunter.found.infection": r"INFECTED$"
        }

        for zabbix_key, search_term in checks.iteritems():
            scan_status = self.check_rkhunter(search_term, logfile)

            zag.add_zabbix_keys({zabbix_key: scan_status})
            zag.send_metrics()
def main():
    """  Main function to run the check """
    argz = parse_args()
    conn_count = 0

    for proc in psutil.process_iter():
        try:
            if proc.name() == argz.proc_to_check:
                if argz.debug:
                    print proc.connections()
                for conn in proc.connections():
                    if conn.status == argz.conn_status and conn.laddr[
                            1] == argz.port:
                        conn_count += 1
        except psutil.NoSuchProcess:
            pass

    if argz.debug:
        print 'Process ({0}) on port {1} has {2} connections in {3} status'.format(
            argz.proc_to_check, argz.port, conn_count, argz.conn_status)

    zgs = ZaggSender(debug=argz.debug)
    zgs.add_zabbix_keys({'{0}'.format(argz.zabbix_key): conn_count})
    zgs.send_metrics()
示例#25
0
from openshift_tools.monitoring.zagg_sender import ZaggSender

ZBX_KEY = "docker.container.dns.resolution"

if __name__ == "__main__":
    cli = AutoVersionClient(base_url='unix://var/run/docker.sock')

    container_id = os.environ['container_uuid']

    container = cli.create_container(image=cli.inspect_container(container_id)['Image'],
                                     command='getent hosts redhat.com')

    cli.start(container=container.get('Id'))
    exit_code = cli.wait(container)

    for i in range(0, 3):
        try:
            cli.remove_container(container.get('Id'))
            break
        except APIError:
            print "Error while cleaning up container."
            time.sleep(5)

    zs = ZaggSender()
    zs.add_zabbix_keys({ZBX_KEY: exit_code})

    print "Sending these metrics:"
    print ZBX_KEY + ": " + str(exit_code)
    zs.send_metrics()
    print "\nDone.\n"
class OpenshiftRouterChecks(object):
    """Checks for the Openshift Router"""
    def __init__(self):
        self.args = None
        self.zgs = None  # zagg sender
        self.kubeconfig = None
        self.parse_args()
        self.get_kubeconfig()
        self.ocutil = None

    def get_kubeconfig(self):
        """Find kubeconfig to use for OCUtil"""
        # Default master kubeconfig
        kubeconfig = '/tmp/admin.kubeconfig'
        non_master_kube_dir = '/etc/origin/node'

        if os.path.exists(kubeconfig):
            # If /tmp/admin.kubeconfig exists, use it!
            pass
        elif os.path.isdir(non_master_kube_dir):
            for my_file in os.listdir(non_master_kube_dir):
                if my_file.endswith(".kubeconfig"):
                    kubeconfig = os.path.join(non_master_kube_dir, my_file)

        if self.args.debug:
            print "Using kubeconfig: {}".format(kubeconfig)

        self.kubeconfig = kubeconfig

    def check_all_router_health(self):
        """ Perform defined router health check on all routers """

        discovery_key = "disc.openshift.cluster.router"
        discovery_macro = "#OS_ROUTER"
        router_health_item = "disc.openshift.cluster.router.health"

        router_pods = self.find_router_pods()
        health_report = {}
        for router_name, pod_details in router_pods.iteritems():
            health = self.router_pod_healthy(pod_details)
            if self.args.verbose:
                print "{} healthy: {}\n".format(router_name, health)
            health_report[router_name] = health

        # make dynamic items, and queue up the associated data
        router_names = health_report.keys()
        self.zgs.add_zabbix_dynamic_item(discovery_key,
                                         discovery_macro,
                                         router_names,
                                         synthetic=True)

        for router_name, health_status in health_report.iteritems():
            zbx_key = "{}[{}]".format(router_health_item, router_name)
            self.zgs.add_zabbix_keys({zbx_key: int(health_status)},
                                     synthetic=True)

    def running_pod_count_check(self):
        """ return hash of deployment configs containing whether the number
            of running pods matches the definition in the deployment config """

        router_pods = self.find_router_pods()

        # get actual running pod count (per DC)
        dc_pod_count = {}
        for _, details in router_pods.iteritems():
            dc_name = details['metadata']['labels']['deploymentconfig']
            dc_pod_count[dc_name] = dc_pod_count.get(dc_name, 0) + 1

        if self.args.debug:
            print "Running pod count: {}".format(dc_pod_count)

        # get expected pod count as defined in each router DC
        expected_pod_count = {}
        for dc_name in dc_pod_count.keys():
            expected_pod_count[dc_name] = self.ocutil.get_dc(
                dc_name)['spec']['replicas']

        if self.args.debug:
            print "Expected pod count: {}".format(expected_pod_count)

        results = {}
        for dc_name in dc_pod_count.keys():
            results[dc_name] = bool(
                dc_pod_count[dc_name] == expected_pod_count[dc_name])

        if self.args.verbose or self.args.debug:
            print "DC replica count matching actual counts: {}".format(results)

        return results

    def check_router_replica_count(self):
        """ Check whether the running router replica count is the same
            as what is defined in the deployment config """

        discovery_key = "disc.openshift.cluster.router"
        discovery_macro = "#ROUTER_DC"
        dc_status_item = "disc.openshift.cluster.router.expected_pod_count"

        replica_results = self.running_pod_count_check()

        # make dynamic items, and queue up the associated data
        dc_names = replica_results.keys()
        self.zgs.add_zabbix_dynamic_item(discovery_key,
                                         discovery_macro,
                                         dc_names,
                                         synthetic=True)

        for dc_name, replica_status in replica_results.iteritems():
            zbx_key = "{}[{}]".format(dc_status_item, dc_name)
            self.zgs.add_zabbix_keys({zbx_key: int(replica_status)},
                                     synthetic=True)

    def run(self):
        """Main function to run the check"""

        self.ocutil = OCUtil(config_file=self.kubeconfig,
                             verbose=self.args.verbose)
        self.zgs = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        self.check_all_router_health()
        self.check_router_replica_count()

        if self.args.dry_run:
            self.zgs.print_unique_metrics_key_value()
        else:
            self.zgs.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description='Openshift Router sender')
        parser.add_argument('-v',
                            '--verbose',
                            action='store_true',
                            default=None,
                            help='Verbose?')
        parser.add_argument('--debug',
                            action='store_true',
                            default=None,
                            help='Debug?')
        parser.add_argument('--dry-run',
                            action='store_true',
                            default=False,
                            help='Collect stats, but no report to zabbix')

        self.args = parser.parse_args()

    @staticmethod
    def get_router_health_url(router):
        """ build router healthcheck URL """

        podip = router['status']['podIP']
        port = router['spec']['containers'][0]['livenessProbe']['httpGet'][
            'port']
        path = router['spec']['containers'][0]['livenessProbe']['httpGet'][
            'path']
        url = 'http://{}:{}{}'.format(podip, port, path)

        return url

    @staticmethod
    def router_pod_healthy(router):
        """ ping the health port for router pod health """

        url = OpenshiftRouterChecks.get_router_health_url(router)

        try:
            result = urllib2.urlopen(url).getcode()
            if result == 200:
                return True
            else:
                return False
        except (urllib2.HTTPError, urllib2.URLError):
            return False

    def find_router_pods(self):
        """ return dict of PODs running haproxy (the router pods) """

        router_pods = {}
        for pod in self.ocutil.get_pods()['items']:
            try:
                img = pod['status']['containerStatuses'][0]['image']
                if 'ose-haproxy-router' in img:
                    router_pods[pod['metadata']['name']] = pod
            except KeyError:
                pass

        return router_pods
class DockerContainerUsageCli(object):
    ''' This is the class that actually pulls eveyrthing together into a cli script.
    '''
    def __init__(self, config_file=None):
        if not config_file:
            self.config_file = '/etc/openshift_tools/container_metrics.yml'
        else:
            self.config_file = config_file

        self.config = None

        self.parse_config()

        self.cli = AutoVersionClient(base_url='unix://var/run/docker.sock', timeout=120)
        self.docker_util = DockerUtil(self.cli)
        self.zagg_sender = ZaggSender(verbose=True)

    def parse_config(self):
        """ parse config file """

        if not self.config:
            if not os.path.exists(self.config_file):
                raise IOError(self.config_file + " does not exist.")

            self.config = yaml.load(file(self.config_file))

    def format_ctr_name(self, ctr_name):
        ''' Takes a container name and if there's a name_format_regex specified, it applies it '''
        for item in self.config['usage_checks']:
            name_match_regex = item['name_match_regex']

            if item.has_key('name_format_regex') and re.match(name_match_regex, ctr_name):
                try:
                    name_format_regex = item['name_format_regex']
                    new_name = re.sub(name_match_regex, name_format_regex, ctr_name)
                    return new_name
                except sre_constants.error as ex:
                    # Just use the full name (we don't want to die because of name formatting)
                    print "\nError: %s: [%s]. Using full name [%s].\n" % (ex.message, name_format_regex, ctr_name)
                    return ctr_name

        return ctr_name

    def main(self):
        ''' The main entrypoint of the cli '''
        ctr_regexes = [uchk['name_match_regex'] for uchk in self.config['usage_checks']]
        use_cgroups = self.config.get('use_cgroups', False)

        ctrs = self.docker_util.get_ctrs_matching_names(ctr_regexes)


        for ctr_name, ctr in ctrs.iteritems():
            (cpu_stats, mem_stats) = self.docker_util.get_ctr_stats(ctr, use_cgroups=use_cgroups)

            formatted_ctr_name = self.format_ctr_name(ctr_name)

            # Add the container hostnames as macros for the dynamic item.
            self.zagg_sender.add_zabbix_dynamic_item(ZBX_DOCKER_DISC_KEY, ZBX_DOCKER_DISC_MACRO,
                                                     [formatted_ctr_name])
            data = {
                '%s[%s]' % (ZBX_CTR_CPU_USED_PCT_KEY, formatted_ctr_name): cpu_stats.used_pct,
                '%s[%s]' % (ZBX_CTR_MEM_USED_KEY, formatted_ctr_name): mem_stats.used,
                '%s[%s]' % (ZBX_CTR_MEM_LIMIT_KEY, formatted_ctr_name): mem_stats.limit,
                '%s[%s]' % (ZBX_CTR_MEM_LIMIT_USED_PCT_KEY, formatted_ctr_name): mem_stats.limit_used_pct,
                '%s[%s]' % (ZBX_CTR_MEM_FAILCNT_KEY, formatted_ctr_name): mem_stats.failcnt,
            }

            print "%s:" % formatted_ctr_name
            for k, v in data.iteritems():
                print "  %s: %s" % (k, v)
            print

            self.zagg_sender.add_zabbix_keys(data)

        # Actually send the metrics
        self.zagg_sender.send_metrics()
示例#28
0
def send_zagg_data(build_ran, create_app, http_code, run_time):
    ''' send data to Zagg'''
    zgs_time = time.time()
    zgs = ZaggSender()
    print "Send data to Zagg"
    if build_ran == 1:
        zgs.add_zabbix_keys({'openshift.master.app.build.create': create_app})
        zgs.add_zabbix_keys(
            {'openshift.master.app.build.create.code': http_code})
        zgs.add_zabbix_keys(
            {'openshift.master.app.build.create.time': run_time})
    else:
        zgs.add_zabbix_keys({'openshift.master.app.create': create_app})
        zgs.add_zabbix_keys({'openshift.master.app.create.code': http_code})
        zgs.add_zabbix_keys({'openshift.master.app.create.time': run_time})
    try:
        zgs.send_metrics()
    except:
        print "Error sending to Zagg: %s \n %s " % sys.exc_info(
        )[0], sys.exc_info()[1]
    print "Data sent in %s seconds" % str(time.time() - zgs_time)
示例#29
0
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.
#

#This is not a module, but pylint thinks it is.  This is a command.
#pylint: disable=invalid-name

from openshift_tools.monitoring.zagg_sender import ZaggSender
from openshift_tools.monitoring import pminfo

FILESYSTEM_METRIC = ['filesys.full']
DISCOVERY_KEY_FS = 'disc.filesys'
ITEM_PROTOTYPE_MACRO_FS = '#OSO_FILESYS'
ITEM_PROTOTYPE_KEY_FULL = 'disc.filesys.full'

FILESYS_METRICS = pminfo.get_metrics(FILESYSTEM_METRIC)

FILTERED_FILESYS_METRICS = {k.replace('filesys.full.', ''):v
                            for (k, v) in FILESYS_METRICS.iteritems()
                            if 'docker' not in k}

ZS = ZaggSender()
ZS.add_zabbix_dynamic_item(DISCOVERY_KEY_FS, ITEM_PROTOTYPE_MACRO_FS, FILTERED_FILESYS_METRICS.keys())

for filesys_name, filesys_full in FILTERED_FILESYS_METRICS.iteritems():
    ZS.add_zabbix_keys({'%s[%s]' % (ITEM_PROTOTYPE_KEY_FULL, filesys_name): filesys_full})

ZS.send_metrics()
示例#30
0
class OpsZaggPCPClient(object):
    """ class to send data to zagg """

    def __init__(self):
        self.zagg_sender = None
        self.args = None
        self.config = None
        self.pcp_metrics = []
        self.heartbeat = None

    def run(self):
        """ main function to run the script """

        self.parse_args()
        self.parse_config(self.args.config_file)
        self.config_zagg_sender()

        if self.args.metrics:
            self.add_metrics()

        self.add_metrics_from_config()

        self.zagg_sender.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """
        parser = argparse.ArgumentParser(description='Zagg PCP metric sender')
        parser.add_argument('--send-pcp-metrics', help="send pcp metrics to zagg", action="store_true")
        parser.add_argument('-m', '--metrics', help="send PCP metrics to zagg")
        parser.add_argument('-s', '--host', help='specify host name as registered in Zabbix')
        parser.add_argument('-z', '--zagg-url', help='url of Zagg server')
        parser.add_argument('--zagg-user', help='username of the Zagg server')
        parser.add_argument('--zagg-pass', help='Password of the Zagg server')
        parser.add_argument('--zagg-ssl-verify', default=None, help='Whether to verify ssl certificates.')
        parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?')
        parser.add_argument('--debug', action='store_true', default=None, help='Debug?')
        parser.add_argument('-c', '--config-file', help='ops-zagg-client config file',
                            default='/etc/openshift_tools/zagg_client.yaml')

        self.args = parser.parse_args()

    def parse_config(self, config_file):
        """ parse config file """
        self.config = yaml.load(file(config_file))

    def config_zagg_sender(self):
        """ configure the zagg_sender """

        zagg_url = self.args.zagg_url if self.args.zagg_url else self.config['zagg']['url']
        zagg_user = self.args.zagg_user if self.args.zagg_user else self.config['zagg']['user']
        zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config['zagg']['pass']
        zagg_verbose = self.args.verbose if self.args.verbose else self.config['zagg']['verbose']
        zagg_debug = self.args.debug if self.args.debug else self.config['zagg']['debug']
        zagg_ssl_verify = self.args.zagg_ssl_verify if self.args.zagg_ssl_verify else self.config['zagg']['ssl_verify']
        host = self.args.host if self.args.host else self.config['host']['name']

        if isinstance(zagg_verbose, str):
            zagg_verbose = (zagg_verbose == 'True')

        if isinstance(zagg_debug, str):
            zagg_debug = (zagg_debug == 'True')

        if isinstance(zagg_ssl_verify, str):
            zagg_ssl_verify = (zagg_ssl_verify == 'True')

        zagg_conn = ZaggConnection(url=zagg_url,
                                   user=zagg_user,
                                   password=zagg_password,
                                   ssl_verify=zagg_ssl_verify,
                                   debug=zagg_debug,
                                  )

        self.zagg_sender = ZaggSender(host, zagg_conn, zagg_verbose, zagg_debug)

    def add_metrics_from_config(self):
        """ collect pcp metrics from a config file. Add to send to ZaggSender """

        self.add_pcp_to_zagg_sender(self.config['pcp']['metrics'])

    def add_metrics(self):
        """ collect pcp metrics to send to ZaggSender """

        metric_list = self.args.metrics.split(',')

        self.add_pcp_to_zagg_sender(metric_list)

    def add_pcp_to_zagg_sender(self, pcp_metrics):
        """ something pcp yada yada """

        pcp_metric_dict = pminfo.get_metrics(metrics=pcp_metrics, derived_metrics=None)

        self.zagg_sender.add_zabbix_keys(pcp_metric_dict)
    def report_to_zabbix(self, total_snapshottable_vols,
                         total_snapshots_created,
                         total_snapshot_creation_errors):
        """ Sends the commands exit code to zabbix. """
        zs = ZaggSender(verbose=True)

        # Populate EBS_SNAPSHOTTER_DISC_SCHEDULE_MACRO with the schedule
        zs.add_zabbix_dynamic_item(EBS_SNAPSHOTTER_DISC_KEY, EBS_SNAPSHOTTER_DISC_SCHEDULE_MACRO, \
                                   [self.args.with_schedule])

        # Send total_snapshottable_vols prototype item key and value
        zs.add_zabbix_keys({'%s[%s]' % (EBS_SNAPSHOTTER_SNAPSHOTTABLE_VOLUMES_KEY, self.args.with_schedule): \
                           total_snapshottable_vols})

        # Send total_snapshots_created prototype item key and value
        zs.add_zabbix_keys({'%s[%s]' % (EBS_SNAPSHOTTER_SNAPSHOTS_CREATED_KEY, self.args.with_schedule): \
                           total_snapshots_created})

        # Send total_snapshot_creation_errors prototype item key and value
        zs.add_zabbix_keys({'%s[%s]' % (EBS_SNAPSHOTTER_SNAPSHOT_CREATION_ERRORS_KEY, self.args.with_schedule): \
                           total_snapshot_creation_errors})

        # Actually send them
        zs.send_metrics()
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.
#

#This is not a module, but pylint thinks it is.  This is a command.
#pylint: disable=invalid-name

from openshift_tools.monitoring.zagg_sender import ZaggSender
from openshift_tools.monitoring import pminfo

ZS = ZaggSender(verbose=True)
FILESYS_FULL_METRIC = ['filesys.full']
FILESYS_INODE_DERIVED_METRICS = {'filesys.inodes.pused' :
                                 'filesys.usedfiles / (filesys.usedfiles + filesys.freefiles) * 100'
                                }

DISCOVERY_KEY_FS = 'disc.filesys'
ITEM_PROTOTYPE_MACRO_FS = '#OSO_FILESYS'

ITEM_PROTOTYPE_KEY_FULL = 'disc.filesys.full'
ITEM_PROTOTYPE_KEY_INODE = 'disc.filesys.inodes.pused'

def filter_out_docker_filesystems(metric_dict, filesystem_filter):
    """ Simple filter to elimate unnecessary characters in the key name """
    filtered_dict = {k.replace(filesystem_filter, ''):v
                     for (k, v) in metric_dict.iteritems()
        # Work around because loopback lies about it's actual total space
        if not dds.is_loopback:
            dds.data_space_total = dds.data_space_used + dds.data_space_available
            dds.metadata_space_total = dds.metadata_space_used + dds.metadata_space_available


        dds.data_space_percent_available = (dds.data_space_available / dds.data_space_total) * 100
        dds.metadata_space_percent_available = (dds.metadata_space_available / dds.metadata_space_total) * 100

        return dds

if __name__ == "__main__":
    keys = None
    exit_code = 0
    zs = ZaggSender()
    try:
        cli = AutoVersionClient(base_url='unix://var/run/docker.sock')
        dw = DockerWatcher(cli)
        dw_dds = dw.get_disk_usage()

        keys = {
            'docker.storage.data.space.used': dw_dds.data_space_used,
            'docker.storage.data.space.available': dw_dds.data_space_available,
            'docker.storage.data.space.percent_available': dw_dds.data_space_percent_available,
            'docker.storage.data.space.total': dw_dds.data_space_total,

            'docker.storage.metadata.space.used': dw_dds.metadata_space_used,
            'docker.storage.metadata.space.available': dw_dds.metadata_space_available,
            'docker.storage.metadata.space.percent_available': dw_dds.metadata_space_percent_available,
            'docker.storage.metadata.space.total': dw_dds.metadata_space_total,
def send_zagg_data(build_ran, create_app, http_code, run_time):
    ''' send data to Zagg'''
    zgs = ZaggSender()
    print "Send data to Zagg"
    if build_ran == 1:
        zgs.add_zabbix_keys({'openshift.master.app.build.create': create_app})
        zgs.add_zabbix_keys(
            {'openshift.master.app.build.create.code': http_code})
        zgs.add_zabbix_keys(
            {'openshift.master.app.build.create.time': run_time})
    else:
        zgs.add_zabbix_keys({'openshift.master.app.create': create_app})
        zgs.add_zabbix_keys({'openshift.master.app.create.code': http_code})
        zgs.add_zabbix_keys({'openshift.master.app.create.time': run_time})
    zgs.send_metrics()
示例#35
0
class OpenshiftMasterZaggClient(object):
    """ Checks for the Openshift Master """
    def __init__(self):
        self.args = None
        self.zagg_sender = None
        self.ora = OpenshiftRestApi()

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose,
                                      debug=self.args.debug)

        try:
            if self.args.healthz or self.args.all_checks:
                self.healthz_check()
        except Exception as ex:
            print "Problem performing healthz check: %s " % ex.message
            self.zagg_sender.add_zabbix_keys(
                {'openshift.master.api.healthz': 'false'})

        try:
            if self.args.api_ping or self.args.all_checks:
                self.api_ping()

            if self.args.project_count or self.args.all_checks:
                self.project_count()

            if self.args.pod_count or self.args.all_checks:
                self.pod_count()

            if self.args.user_count or self.args.all_checks:
                self.user_count()
        except Exception as ex:
            print "Problem Openshift API checks: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({'openshift.master.api.ping':
                                              0})  # Openshift API is down

        try:
            if self.args.metrics or self.args.all_checks:
                self.metric_check()
        except Exception as ex:
            print "Problem getting Openshift metrics at /metrics: %s " % ex.message
            self.zagg_sender.add_zabbix_keys(
                {'openshift.master.metric.ping':
                 0})  # Openshift Metrics are down

        self.zagg_sender.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description='Network metric sender')
        parser.add_argument('-v',
                            '--verbose',
                            action='store_true',
                            default=None,
                            help='Verbose?')
        parser.add_argument('--debug',
                            action='store_true',
                            default=None,
                            help='Debug?')

        master_check_group = parser.add_argument_group(
            'Different Checks to Perform')
        master_check_group.add_argument('--all-checks',
                                        action='store_true',
                                        default=None,
                                        help='Do all of the checks')

        master_check_group.add_argument(
            '--api-ping',
            action='store_true',
            default=None,
            help='Verify the Openshift API is alive')

        master_check_group.add_argument(
            '--healthz',
            action='store_true',
            default=None,
            help='Query the Openshift Master API /healthz')

        master_check_group.add_argument(
            '--metrics',
            action='store_true',
            default=None,
            help='Query the Openshift Master Metrics at /metrics')

        master_check_group.add_argument(
            '--project-count',
            action='store_true',
            default=None,
            help='Query the Openshift Master for Number of Pods')

        master_check_group.add_argument(
            '--pod-count',
            action='store_true',
            default=None,
            help='Query the Openshift Master for Number of Running Pods')

        master_check_group.add_argument(
            '--user-count',
            action='store_true',
            default=None,
            help='Query the Openshift Master for Number of Users')

        self.args = parser.parse_args()

    def api_ping(self):
        """ Verify the Openshift API health is responding correctly """

        print "\nPerforming Openshift API ping check..."

        response = self.ora.get('/api/v1/nodes')
        print "\nOpenshift API ping is alive"
        print "Number of nodes in the Openshift cluster: %s" % len(
            response['items'])

        self.zagg_sender.add_zabbix_keys({
            'openshift.master.api.ping':
            1,
            'openshift.master.node.count':
            len(response['items'])
        })

    def healthz_check(self):
        """ check the /healthz API call """

        print "\nPerforming /healthz check..."

        response = self.ora.get('/healthz', rtype='text')
        print "healthz check returns: %s " % response

        self.zagg_sender.add_zabbix_keys(
            {'openshift.master.api.healthz': str('ok' in response).lower()})

    def metric_check(self):
        """ collect certain metrics from the /metrics API call """

        print "\nPerforming /metrics check..."
        response = self.ora.get('/metrics', rtype='text')

        for metric_type in text_string_to_metric_families(response):

            # Collect the apiserver_request_latencies_summary{resource="pods",verb="LIST",quantiles in /metrics
            # Collect the apiserver_request_latencies_summary{resource="pods",verb="WATCHLIST",quantiles in /metrics
            if metric_type.name == 'apiserver_request_latencies_summary':
                key_str = 'openshift.master.apiserver.latency.summary'
                for sample in metric_type.samples:
                    if (sample[1]['resource'] == 'pods'
                            and sample[1].has_key('quantile')
                            and 'LIST' in sample[1]['verb']):
                        curr_key_str = key_str + ".pods.quantile.%s.%s" % (
                            sample[1]['verb'],
                            sample[1]['quantile'].split('.')[1])

                        if math.isnan(sample[2]):
                            value = 0
                        else:
                            value = sample[2]

                        self.zagg_sender.add_zabbix_keys(
                            {curr_key_str.lower(): int(value / 1000)})

            # Collect the scheduler_e2e_scheduling_latency_microseconds{quantiles in /metrics
            if metric_type.name == 'scheduler_e2e_scheduling_latency_microseconds':
                for sample in metric_type.samples:
                    if sample[1].has_key('quantile'):
                        key_str = 'openshift.master.scheduler.e2e.scheduling.latency'
                        curr_key_str = key_str + ".quantile.%s" % (
                            sample[1]['quantile'].split('.')[1])

                        if math.isnan(sample[2]):
                            value = 0
                        else:
                            value = sample[2]

                        self.zagg_sender.add_zabbix_keys(
                            {curr_key_str.lower(): int(value / 1000)})

        self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping':
                                          1})  #

    def project_count(self):
        """ check the number of projects in Openshift """

        print "\nPerforming project count check..."

        excluded_names = [
            'openshift', 'openshift-infra', 'default', 'ops-monitor'
        ]
        response = self.ora.get('/oapi/v1/projects')

        project_names = [
            project['metadata']['name'] for project in response['items']
        ]
        valid_names = set(project_names) - set(excluded_names)

        print "Project count: %s" % len(valid_names)

        self.zagg_sender.add_zabbix_keys(
            {'openshift.project.count': len(valid_names)})

    def pod_count(self):
        """ check the number of pods in Openshift """

        print "\nPerforming pod count check..."

        response = self.ora.get('/api/v1/pods')

        # Get running pod count
        running_pod_count = 0
        for i in response['items']:
            if 'containerStatuses' in i['status']:
                if 'running' in i['status']['containerStatuses'][0]['state']:
                    running_pod_count += 1

        # Get running pod count on compute only nodes (non-infra)
        running_user_pod_count = 0
        for i in response['items']:
            if 'containerStatuses' in i['status']:
                if 'running' in i['status']['containerStatuses'][0]['state']:
                    if 'nodeSelector' in i['spec']:
                        if i['spec']['nodeSelector']['type'] == 'compute':
                            running_user_pod_count += 1

        print "Total pod count: %s" % len(response['items'])
        print "Running pod count: %s" % running_pod_count
        print "User Running pod count: %s" % running_user_pod_count

        self.zagg_sender.add_zabbix_keys({
            'openshift.master.pod.running.count':
            running_pod_count,
            'openshift.master.pod.user.running.count':
            running_user_pod_count,
            'openshift.master.pod.total.count':
            len(response['items'])
        })

    def user_count(self):
        """ check the number of users in Openshift """

        print "\nPerforming user count check..."

        response = self.ora.get('/oapi/v1/users')

        print "Total user count: %s" % len(response['items'])
        self.zagg_sender.add_zabbix_keys(
            {'openshift.master.user.count': len(response['items'])})
示例#36
0
            exit_code = cli.exec_inspect(exec_id)['ExitCode']
        except APIError:
            # could race from getting a container list and the container exiting
            # before we can exec on it, so just ignore exited containers
            continue

        if exit_code == CMD_NOT_FOUND:
            continue

        print results
        print "Exit Code: " + str(exit_code)

        if exit_code != 0:
            bad_dns_count += 1
            ctr_data = cli.inspect_container(ctr['Id'])
            print "Additional info: Namespace: {} Name: {} IP: {}".format(
                ctr['Labels'].get('io.kubernetes.pod.namespace', 'null'),
                ctr['Labels'].get('io.kubernetes.pod.name', 'null'),
                ctr_data['NetworkSettings']['IPAddress'])

        # Extra whitespace between output for each container
        print

    zs = ZaggSender()
    zs.add_zabbix_keys({ZBX_KEY: bad_dns_count})

    print "Sending these metrics:"
    print ZBX_KEY + ": " + str(bad_dns_count)
    zs.send_metrics()
    print "\nDone.\n"
示例#37
0
class OpenshiftMasterZaggClient(object):
    """ Checks for the Openshift Master """

    def __init__(self):
        self.args = None
        self.zagg_sender = None
        self.ora = None
        self.zabbix_api_key = None
        self.zabbix_healthz_key = None

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        if self.args.local:
            self.ora = OpenshiftRestApi()
            self.args.api_ping = True
            self.args.healthz = True
            self.zabbix_api_key = 'openshift.master.local.api.ping'
            self.zabbix_healthz_key = 'openshift.master.local.api.healthz'
        else:
            master_cfg_from_yaml = []
            with open('/etc/origin/master/master-config.yaml', 'r') as yml:
                master_cfg_from_yaml = yaml.load(yml)
            self.ora = OpenshiftRestApi(host=master_cfg_from_yaml['oauthConfig']['masterURL'],
                                        verify_ssl=True)

            self.zabbix_api_key = 'openshift.master.api.ping'
            self.zabbix_healthz_key = 'openshift.master.api.healthz'

        try:
            if self.args.healthz or self.args.all_checks:
                self.healthz_check()

        except Exception as ex:
            print "Problem performing healthz check: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({self.zabbix_healthz_key: 'false'})

        try:
            if self.args.api_ping or self.args.all_checks:
                self.api_ping()

            if self.args.project_count or self.args.all_checks:
                self.project_count()

            if self.args.pod_count or self.args.all_checks:
                self.pod_count()

            if self.args.user_count or self.args.all_checks:
                self.user_count()

            if self.args.pv_info or self.args.all_checks:
                self.pv_info()

            if self.args.nodes_not_ready or self.args.all_checks:
                self.nodes_not_ready()

        except Exception as ex:
            print "Problem Openshift API checks: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({self.zabbix_api_key: 0}) # Openshift API is down

        try:
            if self.args.metrics or self.args.all_checks:
                self.metric_check()

        except Exception as ex:
            print "Problem getting Openshift metrics at /metrics: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping' : 0}) # Openshift Metrics are down

        self.zagg_sender.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description='Network metric sender')
        parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?')
        parser.add_argument('--debug', action='store_true', default=None, help='Debug?')
        parser.add_argument('-l', '--local', action='store_true', default=False,
                            help='Run local checks against the local API (https://127.0.0.1)')

        master_check_group = parser.add_argument_group('Different Checks to Perform')
        master_check_group.add_argument('--all-checks', action='store_true', default=None,
                                        help='Do all of the checks')

        master_check_group.add_argument('--api-ping', action='store_true', default=None,
                                        help='Verify the Openshift API is alive')

        master_check_group.add_argument('--healthz', action='store_true', default=None,
                                        help='Query the Openshift Master API /healthz')

        master_check_group.add_argument('--metrics', action='store_true', default=None,
                                        help='Query the Openshift Master Metrics at /metrics')

        master_check_group.add_argument('--project-count', action='store_true', default=None,
                                        help='Query the Openshift Master for Number of Pods')

        master_check_group.add_argument('--pod-count', action='store_true', default=None,
                                        help='Query the Openshift Master for Number of Running Pods')

        master_check_group.add_argument('--user-count', action='store_true', default=None,
                                        help='Query the Openshift Master for Number of Users')

        master_check_group.add_argument('--pv-info', action='store_true', default=None,
                                        help='Query the Openshift Master for Persistent Volumes Info')

        master_check_group.add_argument('--nodes-not-ready', action='store_true', default=None,
                                        help='Query the Openshift Master for number of nodes not in Ready state')

        self.args = parser.parse_args()

    def api_ping(self):
        """ Verify the Openshift API health is responding correctly """

        print "\nPerforming Openshift API ping check..."

        response = self.ora.get('/api/v1/nodes')
        print "\nOpenshift API ping is alive"
        print "Number of nodes in the Openshift cluster: %s" % len(response['items'])

        self.zagg_sender.add_zabbix_keys({self.zabbix_api_key: 1,
                                          'openshift.master.node.count': len(response['items'])})

    def healthz_check(self):
        """ check the /healthz API call """

        print "\nPerforming /healthz check..."

        response = self.ora.get('/healthz', rtype='text')
        print "healthz check returns: %s " %response

        self.zagg_sender.add_zabbix_keys({self.zabbix_healthz_key: str('ok' in response).lower()})

    def metric_check(self):
        """ collect certain metrics from the /metrics API call """

        print "\nPerforming /metrics check..."
        response = self.ora.get('/metrics', rtype='text')

        for metric_type in text_string_to_metric_families(response):

            # Collect the apiserver_request_latencies_summary{resource="pods",verb="LIST",quantiles in /metrics
            # Collect the apiserver_request_latencies_summary{resource="pods",verb="WATCHLIST",quantiles in /metrics
            if metric_type.name == 'apiserver_request_latencies_summary':
                key_str = 'openshift.master.apiserver.latency.summary'
                for sample in metric_type.samples:
                    if (sample[1]['resource'] == 'pods'
                            and sample[1].has_key('quantile')
                            and 'LIST' in sample[1]['verb']):
                        curr_key_str = key_str + ".pods.quantile.%s.%s" % (sample[1]['verb'],
                                                                           sample[1]['quantile'].split('.')[1])

                        if math.isnan(sample[2]):
                            value = 0
                        else:
                            value = sample[2]

                        self.zagg_sender.add_zabbix_keys({curr_key_str.lower(): int(value/1000)})

            # Collect the scheduler_e2e_scheduling_latency_microseconds{quantiles in /metrics
            if metric_type.name == 'scheduler_e2e_scheduling_latency_microseconds':
                for sample in metric_type.samples:
                    if sample[1].has_key('quantile'):
                        key_str = 'openshift.master.scheduler.e2e.scheduling.latency'
                        curr_key_str = key_str + ".quantile.%s" % (sample[1]['quantile'].split('.')[1])

                        if math.isnan(sample[2]):
                            value = 0
                        else:
                            value = sample[2]

                        self.zagg_sender.add_zabbix_keys({curr_key_str.lower(): int(value/1000)})

        self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping' : 1}) #

    def project_count(self):
        """ check the number of projects in Openshift """

        print "\nPerforming project count check..."

        excluded_names = ['openshift', 'openshift-infra', 'default', 'ops-monitor']
        response = self.ora.get('/oapi/v1/projects')

        project_names = [project['metadata']['name'] for project in response['items']]
        valid_names = set(project_names) - set(excluded_names)

        print "Project count: %s" % len(valid_names)

        self.zagg_sender.add_zabbix_keys({'openshift.project.count' : len(valid_names)})

    def pod_count(self):
        """ check the number of pods in Openshift """

        print "\nPerforming pod count check..."

        response = self.ora.get('/api/v1/pods')

        # Get running pod count
        running_pod_count = 0
        for i in response['items']:
            if 'containerStatuses' in i['status']:
                if 'running' in i['status']['containerStatuses'][0]['state']:
                    running_pod_count += 1

        # Get running pod count on compute only nodes (non-infra)
        running_user_pod_count = 0
        for i in response['items']:
            if 'containerStatuses' in i['status']:
                if 'running' in i['status']['containerStatuses'][0]['state']:
                    if 'nodeSelector' in i['spec']:
                        if i['spec']['nodeSelector']['type'] == 'compute':
                            running_user_pod_count += 1


        print "Total pod count: %s" % len(response['items'])
        print "Running pod count: %s" % running_pod_count
        print "User Running pod count: %s" % running_user_pod_count

        self.zagg_sender.add_zabbix_keys({'openshift.master.pod.running.count' : running_pod_count,
                                          'openshift.master.pod.user.running.count' : running_user_pod_count,
                                          'openshift.master.pod.total.count' : len(response['items'])})

    def user_count(self):
        """ check the number of users in Openshift """

        print "\nPerforming user count check..."

        response = self.ora.get('/oapi/v1/users')

        print "Total user count: %s" % len(response['items'])
        self.zagg_sender.add_zabbix_keys({'openshift.master.user.count' : len(response['items'])})

    def pv_info(self):
        """ Gather info about the persistent volumes in Openshift """

        print "\nPerforming user persistent volume count...\n"

        response = self.ora.get('/api/v1/persistentvolumes')

        pv_capacity_total = 0
        pv_capacity_available = 0
        pv_types = {'Available': 0,
                    'Bound': 0,
                    'Released': 0,
                    'Failed': 0}

        # Dynamic items variables
        discovery_key_pv = 'disc.pv'
        item_prototype_macro_pv = '#OSO_PV'
        item_prototype_key_count = 'disc.pv.count'
        item_prototype_key_available = 'disc.pv.available'
        dynamic_pv_count = defaultdict(int)
        dynamic_pv_available = defaultdict(int)

        for item in response['items']:
            # gather dynamic pv counts
            dynamic_pv_count[item['spec']['capacity']['storage']] += 1

            #get count of each pv type available
            pv_types[item['status']['phase']] += 1

            #get info for the capacity and capacity available
            capacity = item['spec']['capacity']['storage']
            if item['status']['phase'] == 'Available':
                # get total available capacity
                pv_capacity_available = pv_capacity_available + int(capacity.replace('Gi', ''))

                # gather dynamic pv available counts
                dynamic_pv_available[item['spec']['capacity']['storage']] += 1

            pv_capacity_total = pv_capacity_total + int(capacity.replace('Gi', ''))

        print "Total Persistent Volume Total count: %s" % len(response['items'])
        print 'Total Persistent Volume Capacity: %s' % pv_capacity_total
        print 'Total Persisten Volume Available Capacity: %s' % pv_capacity_available

        self.zagg_sender.add_zabbix_keys(
            {'openshift.master.pv.total.count' : len(response['items']),
             'openshift.master.pv.space.total': pv_capacity_total,
             'openshift.master.pv.space.available': pv_capacity_available})

        for key, value in pv_types.iteritems():
            print "Total Persistent Volume %s count: %s" % (key, value)
            self.zagg_sender.add_zabbix_keys(
                {'openshift.master.pv.%s.count' %key.lower() : value})

        # Add dynamic items
        self.zagg_sender.add_zabbix_dynamic_item(discovery_key_pv, item_prototype_macro_pv, dynamic_pv_count.keys())

        for size, count in dynamic_pv_count.iteritems():
            print
            print "Total Persistent Volume %s count: %s" % (size, count)
            print "Total Persistent Volume available %s count: %s" % (size, dynamic_pv_available[size])

            self.zagg_sender.add_zabbix_keys({"%s[%s]" %(item_prototype_key_count, size) : count,
                                              "%s[%s]" %(item_prototype_key_available, size) : dynamic_pv_available[size]})




    def nodes_not_ready(self):
        """ check the number of nodes in the cluster that are not ready"""

        print "\nPerforming nodes not ready check..."

        response = self.ora.get('/api/v1/nodes')

        nodes_not_schedulable = []

        for n in response['items']:
            if "unschedulable" in n['spec']:
                nodes_not_schedulable.append(n)

        nodes_not_ready = []

        for n in response['items']:
            has_ready_status = False
            for cond in n['status']['conditions']:
                if cond['reason'] == "KubeletReady":
                    has_ready_status = True
                    if cond['status'].lower() != "true":
                        nodes_not_ready.append(n)
            if has_ready_status == False:
                nodes_not_ready.append(n)


        print "Count of nodes not schedulable: %s" % len(nodes_not_schedulable)
        print "Count of nodes not ready: %s" % len(nodes_not_ready)

        self.zagg_sender.add_zabbix_keys(
            {'openshift.master.nodesnotready.count' : len(nodes_not_ready)})

        self.zagg_sender.add_zabbix_keys(
            {'openshift.master.nodesnotschedulable.count' : len(nodes_not_schedulable)})
示例#38
0
    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        if self.args.local:
            self.ora = OpenshiftRestApi()
            self.args.api_ping = True
            self.args.healthz = True
            self.zabbix_api_key = 'openshift.master.local.api.ping'
            self.zabbix_healthz_key = 'openshift.master.local.api.healthz'
        else:
            master_cfg_from_yaml = []
            with open('/etc/origin/master/master-config.yaml', 'r') as yml:
                master_cfg_from_yaml = yaml.load(yml)
            self.ora = OpenshiftRestApi(host=master_cfg_from_yaml['oauthConfig']['masterURL'],
                                        verify_ssl=True)

            self.zabbix_api_key = 'openshift.master.api.ping'
            self.zabbix_healthz_key = 'openshift.master.api.healthz'

        try:
            if self.args.healthz or self.args.all_checks:
                self.healthz_check()

        except Exception as ex:
            print "Problem performing healthz check: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({self.zabbix_healthz_key: 'false'})

        try:
            if self.args.api_ping or self.args.all_checks:
                self.api_ping()

            if self.args.project_count or self.args.all_checks:
                self.project_count()

            if self.args.pod_count or self.args.all_checks:
                self.pod_count()

            if self.args.user_count or self.args.all_checks:
                self.user_count()

            if self.args.pv_info or self.args.all_checks:
                self.pv_info()

            if self.args.nodes_not_ready or self.args.all_checks:
                self.nodes_not_ready()

        except Exception as ex:
            print "Problem Openshift API checks: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({self.zabbix_api_key: 0}) # Openshift API is down

        try:
            if self.args.metrics or self.args.all_checks:
                self.metric_check()

        except Exception as ex:
            print "Problem getting Openshift metrics at /metrics: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping' : 0}) # Openshift Metrics are down

        self.zagg_sender.send_metrics()
class OpenshiftDockerRegigtryChecker(object):
    """ Checks for the Openshift Cluster Docker Registry """
    def __init__(self):
        self.args = None
        self.zagg_sender = None

        self.docker_hosts = []
        self.docker_port = None
        # Assume secure registry
        self.docker_protocol = 'https'
        self.docker_service_ip = None
        self.kubeconfig = None

    def get_kubeconfig(self):
        ''' Find kubeconfig to use for OCUtil '''
        # Default master kubeconfig
        kubeconfig = '/tmp/admin.kubeconfig'
        non_master_kube_dir = '/etc/origin/node'

        if os.path.isdir(non_master_kube_dir):
            for my_file in os.listdir(non_master_kube_dir):
                if my_file.endswith(".kubeconfig"):
                    kubeconfig = os.path.join(non_master_kube_dir, my_file)

        if self.args.debug:
            print "Using kubeconfig: {}".format(kubeconfig)

        self.kubeconfig = kubeconfig

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.get_kubeconfig()
        ocutil = OCUtil(config_file=self.kubeconfig, verbose=self.args.verbose)
        self.zagg_sender = ZaggSender(verbose=self.args.verbose,
                                      debug=self.args.debug)

        try:
            oc_yaml = ocutil.get_service('docker-registry')
            self.get_registry_service(oc_yaml)
            oc_yaml = ocutil.get_endpoint('docker-registry')
            self.get_registry_endpoints(oc_yaml)
        except Exception as ex:
            print "Problem retreiving registry IPs: %s " % ex.message

        self.registry_service_check()
        self.registry_health_check()

        self.zagg_sender.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(
            description='Openshift Cluster Docker Registry sender')
        parser.add_argument('-v',
                            '--verbose',
                            action='store_true',
                            default=None,
                            help='Verbose?')
        parser.add_argument('--debug',
                            action='store_true',
                            default=None,
                            help='Debug?')

        self.args = parser.parse_args()

    def get_registry_service(self, service_yaml):
        ''' This will get the service IP of the docker registry '''
        print "\nGetting Docker Registry service IP..."

        service = yaml.safe_load(service_yaml)
        self.docker_service_ip = str(service['spec']['clusterIP'])

    def get_registry_endpoints(self, endpoint_yaml):
        """
            This will return the docker registry endpoint IPs that are being served
            inside of kubernetes.
        """

        print "\nFinding the Docker Registry pods via Openshift API calls..."

        endpoints = yaml.safe_load(endpoint_yaml)
        self.docker_port = str(endpoints['subsets'][0]['ports'][0]['port'])

        for address in endpoints['subsets'][0]['addresses']:
            self.docker_hosts.append(address['ip'])

    def healthy_registry(self, ip_addr, port, secure=True):
        ''' Test a specific registry URL
            In v3.0.2.0, http://registry.url/healthz worked. The '/healthz' was
              something added by openshift to the docker registry. This should return a http status
              code of 200 and text of {} (empty json).

            In 3.1.1 and on, '/' should work and return a 200 to
              indicate that the registry is up and running. Please see the following url for
              more info.  Look under load balancer health checks:
            https://github.com/docker/distribution/blob/master/docs/deploying.md#running-a-domain-registry
        '''

        proto = self.docker_protocol
        if not secure:
            proto = 'http'
        url = '{}://{}:{}/'.format(proto, ip_addr, port)

        try:
            print "Performing Docker Registry check on URL: {}".format(url)
            response = urllib2.urlopen(url, timeout=20)

            if response.getcode() == 200:
                return True
        except urllib2.URLError:
            print "Received error accessing URL: {}".format(url)
        except socket.timeout:
            print "Timed out accessing URL: {}".format(url)

        # Try with /healthz
        try:
            url = url + 'healthz'
            print "Performing Docker Registry check on URL: {}".format(url)
            response = urllib2.urlopen(url, timeout=20)

            if response.getcode() == 200:
                return True
        except urllib2.URLError:
            print "Received error access URL: {}".format(url)
        except socket.timeout:
            print "Timed out accessing URL: {}".format(url)

        # We tried regular and 'healthz' URLs. Registry inaccessible.
        return False

    def registry_service_check(self):
        ''' Test and report on health of Docker Registry service '''

        status = '0'

        # Skip if we failed to fetch a valid service IP
        if self.docker_service_ip != None:
            if self.healthy_registry(self.docker_service_ip, self.docker_port):
                status = '1'
            elif self.healthy_registry(self.docker_service_ip,
                                       self.docker_port,
                                       secure=False):
                status = '1'

        print "\nDocker Registry service status: {}".format(status)

        self.zagg_sender.add_zabbix_keys(
            {'openshift.node.registry.service.ping': status})

    def registry_health_check(self):
        """
            Check the registry's / URL
        """

        healthy_registries = 0

        for host in self.docker_hosts:
            if self.healthy_registry(host, self.docker_port):
                healthy_registries += 1
            elif self.healthy_registry(host, self.docker_port, secure=False):
                healthy_registries += 1

        healthy_pct = 0

        if len(self.docker_hosts) > 0:
            healthy_pct = (healthy_registries / len(self.docker_hosts) * 100)

        print "\n%s of %s registry PODs are healthy\n" % (
            healthy_registries, len(self.docker_hosts))

        self.zagg_sender.add_zabbix_keys(
            {'openshift.node.registry-pods.healthy_pct': healthy_pct})
示例#40
0
class OpsZaggClient(object):
    """ class to send data to zagg """

    def __init__(self):
        self.zagg_sender = None
        self.args = None
        self.config = None
        self.heartbeat = None

    def run(self):
        """ main function to run the script """

        self.parse_args()
        self.parse_config(self.args.config_file)
        self.config_zagg_sender()

        if self.args.send_heartbeat:
            self.add_heartbeat()

        if self.args.key and self.args.value:
            self.add_zabbix_key()

        if self.args.discovery_key and self.args.macro_string and self.args.macro_names:
            self.add_zabbix_dynamic_item()

        self.zagg_sender.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """
        parser = argparse.ArgumentParser(description="Zagg metric sender")
        parser.add_argument("--send-heartbeat", help="send heartbeat metric to zagg", action="store_true")
        parser.add_argument("-s", "--host", help="specify host name as registered in Zabbix")
        parser.add_argument("-z", "--zagg-url", help="url of Zagg server")
        parser.add_argument("--zagg-user", help="username of the Zagg server")
        parser.add_argument("--zagg-pass", help="Password of the Zagg server")
        parser.add_argument("--zagg-ssl-verify", default=None, help="Whether to verify ssl certificates.")
        parser.add_argument("-v", "--verbose", action="store_true", default=None, help="Verbose?")
        parser.add_argument("--debug", action="store_true", default=None, help="Debug?")
        parser.add_argument(
            "-c", "--config-file", help="ops-zagg-client config file", default="/etc/openshift_tools/zagg_client.yaml"
        )

        key_value_group = parser.add_argument_group("Sending a Key-Value Pair")
        key_value_group.add_argument("-k", "--key", help="zabbix key")
        key_value_group.add_argument("-o", "--value", help="zabbix value")

        low_level_discovery_group = parser.add_argument_group("Sending a Low Level Discovery Item")
        low_level_discovery_group.add_argument("--discovery-key", help="discovery key")
        low_level_discovery_group.add_argument("--macro-string", help="macro string")
        low_level_discovery_group.add_argument("--macro-names", help="comma separated list of macro names")

        self.args = parser.parse_args()

    def parse_config(self, config_file):
        """ parse config file """
        self.config = yaml.load(file(config_file))

    def config_zagg_sender(self):
        """ configure the zagg_sender """

        zagg_url = self.args.zagg_url if self.args.zagg_url else self.config["zagg"]["url"]
        zagg_user = self.args.zagg_user if self.args.zagg_user else self.config["zagg"]["user"]
        zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config["zagg"]["pass"]
        zagg_verbose = self.args.verbose if self.args.verbose else self.config["zagg"]["verbose"]
        zagg_debug = self.args.debug if self.args.debug else self.config["zagg"]["debug"]
        zagg_ssl_verify = self.args.zagg_ssl_verify if self.args.zagg_ssl_verify else self.config["zagg"]["ssl_verify"]
        host = self.args.host if self.args.host else self.config["host"]["name"]

        if isinstance(zagg_verbose, str):
            zagg_verbose = zagg_verbose == "True"

        if isinstance(zagg_debug, str):
            zagg_debug = zagg_debug == "True"

        if isinstance(zagg_ssl_verify, str):
            zagg_ssl_verify = zagg_ssl_verify == "True"

        zagg_conn = ZaggConnection(
            url=zagg_url, user=zagg_user, password=zagg_password, ssl_verify=zagg_ssl_verify, debug=zagg_debug
        )

        self.zagg_sender = ZaggSender(host, zagg_conn, zagg_verbose, zagg_debug)

    def add_heartbeat(self):
        """ crate a hearbeat metric """
        heartbeat = ZaggHeartbeat(
            templates=self.config["heartbeat"]["templates"], hostgroups=self.config["heartbeat"]["hostgroups"]
        )
        self.zagg_sender.add_heartbeat(heartbeat)

    def add_zabbix_key(self):
        """ send zabbix key/value pair to zagg """

        self.zagg_sender.add_zabbix_keys({self.args.key: self.args.value})

    def add_zabbix_dynamic_item(self):
        """ send zabbix low level discovery item to zagg """

        self.zagg_sender.add_zabbix_dynamic_item(
            self.args.discovery_key, self.args.macro_string, self.args.macro_names.split(",")
        )
class OpenshiftDockerRegigtryChecker(object):
    """ Checks for the Openshift Cluster Docker Registry """

    def __init__(self):
        self.args = None
        self.zagg_sender = None

        self.docker_hosts = []
        self.docker_port = None
        # Assume secure registry
        self.docker_protocol = 'https'
        self.docker_service_ip = None
        self.kubeconfig = None

    def get_kubeconfig(self):
        ''' Find kubeconfig to use for OCUtil '''
        # Default master kubeconfig
        kubeconfig = '/etc/origin/master/admin.kubeconfig'
        non_master_kube_dir = '/etc/origin/node'

        if os.path.isdir(non_master_kube_dir):
            for my_file in os.listdir(non_master_kube_dir):
                if my_file.endswith(".kubeconfig"):
                    kubeconfig = os.path.join(non_master_kube_dir, my_file)

        if self.args.debug:
            print "Using kubeconfig: {}".format(kubeconfig)

        self.kubeconfig = kubeconfig

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.get_kubeconfig()
        ocutil = OCUtil(config_file=self.kubeconfig, verbose=self.args.verbose)
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        try:
            oc_yaml = ocutil.get_service('docker-registry')
            self.get_registry_service(oc_yaml)
            oc_yaml = ocutil.get_endpoint('docker-registry')
            self.get_registry_endpoints(oc_yaml)
        except Exception as ex:
            print "Problem retreiving registry IPs: %s " % ex.message

        self.registry_service_check()
        self.registry_health_check()

        self.zagg_sender.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description='Openshift Cluster Docker Registry sender')
        parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?')
        parser.add_argument('--debug', action='store_true', default=None, help='Debug?')

        self.args = parser.parse_args()

    def get_registry_service(self, service_yaml):
        ''' This will get the service IP of the docker registry '''
        print "\nGetting Docker Registry service IP..."

        service = yaml.safe_load(service_yaml)
        self.docker_service_ip = str(service['spec']['clusterIP'])

    def get_registry_endpoints(self, endpoint_yaml):
        """
            This will return the docker registry endpoint IPs that are being served
            inside of kubernetes.
        """

        print "\nFinding the Docker Registry pods via Openshift API calls..."

        endpoints = yaml.safe_load(endpoint_yaml)
        self.docker_port = str(endpoints['subsets'][0]['ports'][0]['port'])

        for address in endpoints['subsets'][0]['addresses']:
            self.docker_hosts.append(address['ip'])

    def healthy_registry(self, ip_addr, port, secure=True):
        ''' Test a specific registry URL
            In v3.0.2.0, http://registry.url/healthz worked. The '/healthz' was
              something added by openshift to the docker registry. This should return a http status
              code of 200 and text of {} (empty json).

            In 3.1.1 and on, '/' should work and return a 200 to
              indicate that the registry is up and running. Please see the following url for
              more info.  Look under load balancer health checks:
            https://github.com/docker/distribution/blob/master/docs/deploying.md#running-a-domain-registry
        '''

        proto = self.docker_protocol
        if not secure:
            proto = 'http'
        url = '{}://{}:{}/'.format(proto, ip_addr, port)

        try:
            print "Performing Docker Registry check on URL: {}".format(url)
            response = urllib2.urlopen(url, timeout=20)

            if response.getcode() == 200:
                return True
        except urllib2.URLError:
            print "Received error accessing URL: {}".format(url)
        except socket.timeout:
            print "Timed out accessing URL: {}".format(url)

        # Try with /healthz
        try:
            url = url + 'healthz'
            print "Performing Docker Registry check on URL: {}".format(url)
            response = urllib2.urlopen(url, timeout=20)

            if response.getcode() == 200:
                return True
        except urllib2.URLError:
            print "Received error access URL: {}".format(url)
        except socket.timeout:
            print "Timed out accessing URL: {}".format(url)

        # We tried regular and 'healthz' URLs. Registry inaccessible.
        return False

    def registry_service_check(self):
        ''' Test and report on health of Docker Registry service '''

        status = '0'

        # Skip if we failed to fetch a valid service IP
        if self.docker_service_ip != None:
            if self.healthy_registry(self.docker_service_ip, self.docker_port):
                status = '1'
            elif self.healthy_registry(self.docker_service_ip, self.docker_port,
                                       secure=False):
                status = '1'

        print "\nDocker Registry service status: {}".format(status)

        self.zagg_sender.add_zabbix_keys({'openshift.node.registry.service.ping' : status})

    def registry_health_check(self):
        """
            Check the registry's / URL
        """

        healthy_registries = 0

        for host in self.docker_hosts:
            if self.healthy_registry(host, self.docker_port):
                healthy_registries += 1
            elif self.healthy_registry(host, self.docker_port, secure=False):
                healthy_registries += 1

        healthy_pct = 0

        if len(self.docker_hosts) > 0:
            healthy_pct = (healthy_registries / len(self.docker_hosts) *100)

        print "\n%s of %s registry PODs are healthy\n" %(healthy_registries,
                                                         len(self.docker_hosts))

        self.zagg_sender.add_zabbix_keys({'openshift.node.registry-pods.healthy_pct' : healthy_pct})
def main():
    ''' Get data from etcd API
    '''

    SSL_CLIENT_CERT = '/etc/openshift/master/master.etcd-client.crt'
    SSL_CLIENT_KEY = '/etc/openshift/master/master.etcd-client.key'
    OPENSHIFT_MASTER_CONFIG = '/etc/openshift/master/master-config.yaml'

    # find out the etcd port
    with open(OPENSHIFT_MASTER_CONFIG, 'r') as f:
        config = yaml.load(f)

    API_HOST = config["etcdClientInfo"]["urls"][0]

    # define the store API URL
    API_URL = API_HOST + "/v2/stats/store"


    zs = ZaggSender()
    # Fetch the store statics from API
    try:
        request = requests.get(API_URL, cert=(SSL_CLIENT_CERT, SSL_CLIENT_KEY), verify=False)
        content = json.loads(request.content)
        etcd_ping = 1

        # parse the items and add it as metrics
        zs.add_zabbix_keys({'openshift.master.etcd.create.success' : content['createSuccess']})
        zs.add_zabbix_keys({'openshift.master.etcd.create.fail' : content['createFail']})
        zs.add_zabbix_keys({'openshift.master.etcd.delete.success' : content['deleteSuccess']})
        zs.add_zabbix_keys({'openshift.master.etcd.delete.fail' : content['deleteFail']})
        zs.add_zabbix_keys({'openshift.master.etcd.get.success' : content['getsSuccess']})
        zs.add_zabbix_keys({'openshift.master.etcd.get.fail' : content['getsFail']})
        zs.add_zabbix_keys({'openshift.master.etcd.set.success' : content['setsSuccess']})
        zs.add_zabbix_keys({'openshift.master.etcd.set.fail' : content['setsFail']})
        zs.add_zabbix_keys({'openshift.master.etcd.update.success' : content['updateSuccess']})
        zs.add_zabbix_keys({'openshift.master.etcd.update.fail' : content['updateFail']})
        zs.add_zabbix_keys({'openshift.master.etcd.watchers' : content['watchers']})

    except requests.exceptions.ConnectionError as ex:
        print "ERROR talking to etcd API: %s" % ex.message
        etcd_ping = 0

    zs.add_zabbix_keys({'openshift.master.etcd.ping' : etcd_ping})

    # Finally, sent them to zabbix
    zs.send_metrics()
        cmd = ["ovs-ofctl", "-O", "OpenFlow13", "del-flows", "br0"]
        for ovs_rule in rule_list:
            # The trailing '/-1' is the wildcard match
            rule_to_cookie = "cookie=0x{0}/-1".format(ovs_rule)
            cmd.append(rule_to_cookie)
            subprocess.call(cmd)
            cmd.pop()

        # Since rule list has changed, force it to regenerate next time
        self.rules = None

ZBX_KEY = "openshift.node.ovs.stray.rules"

if __name__ == "__main__":
    ovs_fixer = OVS()
    zgs = ZaggSender()

    # Dev says rules before ports since OpenShift will set up ports, then rules
    ovs_fixer.get_rule_list()
    ovs_ports = ovs_fixer.get_port_list()

    ovs_bad_rules = ovs_fixer.find_bad_rules()

    # Report bad/stray rules count before removing
    zgs.add_zabbix_keys({ZBX_KEY: len(ovs_bad_rules)})
    zgs.send_metrics()

    print "Good ports: {0}".format(str(ovs_ports))
    print "Bad rules: {0}".format(str(ovs_bad_rules))

    ovs_fixer.remove_rules(ovs_bad_rules)
class OpenshiftSkyDNSZaggClient(object):
    """ Checks for the Openshift Master SkyDNS """
    def __init__(self):
        self.args = None
        self.zagg_sender = None
        self.ora = OpenshiftRestApi()
        self.dns_host = ''
        self.dns_port = 53
        self.openshift_services = []

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose,
                                      debug=self.args.debug)

        self.get_openshift_services()
        dns_host = [
            i for i in self.openshift_services
            if i['name'] == 'kubernetes' and i['namespace'] == 'default'
        ]

        if len(dns_host) == 1:
            self.dns_host = dns_host[0]['ip']
        else:
            print "\nUnable to find SKY DNS server."
            print "Please run \"oc get services -n default\" to locate kubernetes service"
            sys.exit(1)

        if self.check_dns_port_alive():
            self.do_dns_check()

        self.zagg_sender.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description='Network metric sender')
        parser.add_argument('-v',
                            '--verbose',
                            action='store_true',
                            default=None,
                            help='Verbose?')
        parser.add_argument('--debug',
                            action='store_true',
                            default=None,
                            help='Debug?')

        self.args = parser.parse_args()

    def check_dns_port_alive(self):
        """ Verify that the DNS port (TCP 53) is alive """

        print "\nPerforming Openshift DNS port check..."

        try:
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            s.settimeout(1)
            s.connect((self.dns_host, self.dns_port))
            s.close()

            print "\nOpenshift SkyDNS host: %s, port: %s is OPEN" % (
                self.dns_host, self.dns_port)
            print "================================================\n"
            self.zagg_sender.add_zabbix_keys(
                {'openshift.master.skydns.port.open': 1})

            return True

        except socket.error, e:
            print "\nOpenshift SkyDNS host: %s, port: %s is CLOSED" % (
                self.dns_host, self.dns_port)
            print "Python Error: %s" % e
            print "================================================\n"
            self.zagg_sender.add_zabbix_keys(
                {'openshift.master.skydns.port.open': 0})

            return False
class OpenshiftClusterCapacity(object):
    ''' Checks for cluster capacity '''
    def __init__(self):
        self.args = None
        self.zagg_sender = None
        self.ora = None
        self.sql_conn = None
        self.zbx_key_prefix = "openshift.master.cluster.compute_nodes."

    def run(self):
        '''  Main function to run the check '''

        self.parse_args()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose,
                                      debug=self.args.debug)

        master_cfg = []
        with open(self.args.master_config, 'r') as yml:
            master_cfg = yaml.load(yml)
        self.ora = OpenshiftRestApi(
            host=master_cfg['oauthConfig']['masterURL'], verify_ssl=True)

        self.cluster_capacity()

        if not self.args.dry_run:
            self.zagg_sender.send_metrics()

    def parse_args(self):
        ''' parse the args from the cli '''

        parser = argparse.ArgumentParser(description='Cluster capacity sender')
        parser.add_argument(
            '--master-config',
            default='/etc/origin/master/master-config.yaml',
            help='Location of OpenShift master-config.yml file')
        parser.add_argument('-v',
                            '--verbose',
                            action='store_true',
                            default=None,
                            help='Verbose?')
        parser.add_argument('--debug',
                            action='store_true',
                            default=None,
                            help='Debug?')
        parser.add_argument('--dry-run',
                            action='store_true',
                            default=False,
                            help='Do not sent results to Zabbix')

        self.args = parser.parse_args()

    def load_nodes(self):
        ''' load nodes into SQL '''

        self.sql_conn.execute('''CREATE TABLE nodes
                                 (name text, type text, api text,
                                  max_cpu integer, max_memory integer,
                                  max_pods integer)''')
        response = self.ora.get('/api/v1/nodes')

        for new_node in response['items']:
            # Skip nodes not in 'Ready' state
            node_ready = False
            for condition in new_node['status']['conditions']:
                if condition['type'] == 'Ready' and \
                   condition['status'] == 'True':
                    node_ready = True
            if not node_ready:
                continue

            # Skip unschedulable nodes
            if new_node['spec'].get('unschedulable'):
                continue

            node = {}
            node['name'] = new_node['metadata']['name']
            node['type'] = new_node['metadata']['labels'].get(
                'type', 'unknown')
            node['api'] = new_node['metadata']['selfLink']

            if 'allocatable' in new_node['status']:
                cpu = new_node['status']['allocatable']['cpu']
                mem = new_node['status']['allocatable']['memory']
                node['max_pods'] = int(
                    new_node['status']['allocatable']['pods'])
            else:
                cpu = new_node['status']['capacity']['cpu']
                mem = new_node['status']['capacity']['memory']
                node['max_pods'] = int(new_node['status']['capacity']['pods'])

            node['max_cpu'] = to_milicores(cpu)
            node['max_memory'] = to_bytes(mem)

            if self.args.debug:
                print "Adding node: {}".format(str(node))

            self.sql_conn.execute(
                'INSERT INTO nodes VALUES (?,?,?,?,?,?)',
                (node['name'], node['type'], node['api'], node['max_cpu'],
                 node['max_memory'], node['max_pods']))

    @staticmethod
    def load_container_limits(pod, containers):
        ''' process/store container limits data '''

        for container in containers:
            if 'limits' in container['resources']:
                pod['cpu_limits'] = int(pod.get('cpu_limits', 0)) \
                    + int(to_milicores(container['resources']['limits'].get('cpu', '0')))

                pod['memory_limits'] = int(pod.get('memory_limits', 0)) \
                    + int(to_bytes(container['resources']['limits'].get('memory', '0')))

            if 'requests' in container['resources']:
                pod['cpu_requests'] = int(pod.get('cpu_requests', 0)) \
                    + int(to_milicores(container['resources']['requests'].get('cpu', '0')))

                pod['memory_requests'] = int(pod.get('memory_requests', 0)) \
                    + int(to_bytes(container['resources']['requests'].get('memory', '0')))

    def load_pods(self):
        ''' put pod details into db '''

        self.sql_conn.execute('''CREATE TABLE pods
                                 (name text, namespace text, api text,
                                  cpu_limits integer, cpu_requests integer,
                                  memory_limits integer,
                                  memory_requests integer, node text)''')
        response = self.ora.get('/api/v1/pods')

        for new_pod in response['items']:
            if new_pod['status']['phase'] != 'Running':
                continue

            pod = {}
            pod['name'] = new_pod['metadata']['name']
            pod['namespace'] = new_pod['metadata']['namespace']
            pod['api'] = new_pod['metadata']['selfLink']
            pod['node'] = new_pod['spec']['nodeName']
            self.load_container_limits(pod, new_pod['spec']['containers'])

            self.sql_conn.execute(
                'INSERT INTO pods VALUES (?,?,?,?,?,?,?,?)',
                (pod['name'], pod['namespace'], pod['api'],
                 pod.get('cpu_limits'), pod.get('cpu_requests'),
                 pod.get('memory_limits'), pod.get('memory_requests'),
                 pod['node']))

    def get_largest_pod(self):
        ''' return single largest memory request number for all running pods '''

        max_pod = 0
        for row in self.sql_conn.execute('''SELECT MAX(memory_requests)
                                            FROM pods, nodes
                                            WHERE pods.node=nodes.name
                                              AND nodes.type="compute"'''):
            max_pod = row[0]

        return max_pod

    def how_many_schedulable(self, node_size):
        ''' return how many pods with memory request 'node_size' can be scheduled '''

        nodes = {}

        # get max mem for each compute node
        for row in self.sql_conn.execute('''SELECT nodes.name, nodes.max_memory
                                            FROM nodes
                                            WHERE nodes.type="compute"'''):
            nodes[row[0]] = {
                'max_memory': row[1],
                # set memory_allocated to '0' because node may have
                # no pods running, and next SQL query below will
                # leave this field unpopulated
                'memory_scheduled': 0
            }

        # get memory requests for all pods on all compute nodes
        for row in self.sql_conn.execute('''SELECT nodes.name,
                                                   SUM(pods.memory_requests)
                                            FROM pods, nodes
                                            WHERE pods.node=nodes.name
                                              AND nodes.type="compute"
                                            GROUP BY nodes.name'''):
            nodes[row[0]]['memory_scheduled'] = row[1]

        schedulable = 0
        for node in nodes.keys():
            # TODO: Some containers from `oc get pods --all-namespaces -o json`
            # don't have resources scheduled, causing memory_scheduled == 0
            available = nodes[node]['max_memory'] - \
                        nodes[node]['memory_scheduled']
            num = available / node_size
            # ignore negative number (overcommitted nodes)
            if num > 0:
                schedulable += num

        return schedulable

    def get_compute_nodes_max_schedulable_cpu(self):
        ''' calculate total schedulable CPU (in milicores) for all compute nodes '''
        max_cpu = 0
        for row in self.sql_conn.execute('''SELECT SUM(nodes.max_cpu)
                                            FROM nodes
                                            WHERE nodes.type="compute" '''):
            max_cpu = row[0]
        return max_cpu

    def get_compute_nodes_max_schedulable_mem(self):
        ''' calculate total schedulable memory for all compute nodes '''
        max_mem = 0
        for row in self.sql_conn.execute('''SELECT SUM(nodes.max_memory)
                                            FROM nodes
                                            WHERE nodes.type="compute" '''):
            max_mem = row[0]
        return max_mem

    def get_compute_nodes_scheduled_cpu(self):
        ''' calculate cpu scheduled to pods
            (total requested and percentage of cluster-wide total) '''
        max_cpu = self.get_compute_nodes_max_schedulable_cpu()
        cpu_requests_for_all_pods = 0
        for row in self.sql_conn.execute('''SELECT SUM(pods.cpu_requests)
                                            FROM pods, nodes
                                            WHERE pods.node = nodes.name
                                              AND nodes.type = "compute" '''):
            cpu_requests_for_all_pods = row[0]

        cpu_scheduled_as_pct = 100.0 * cpu_requests_for_all_pods / max_cpu

        cpu_unscheduled = max_cpu - cpu_requests_for_all_pods
        cpu_unscheduled_as_pct = 100.0 * cpu_unscheduled / max_cpu

        return (cpu_requests_for_all_pods, cpu_scheduled_as_pct,
                cpu_unscheduled, cpu_unscheduled_as_pct)

    def get_compute_nodes_scheduled_mem(self):
        ''' calculate mem allocated to pods
            (total requested and percentage of cluster-wide total) '''
        max_mem = self.get_compute_nodes_max_schedulable_mem()
        mem_requests_for_all_pods = 0
        for row in self.sql_conn.execute('''SELECT SUM(pods.memory_requests)
                                            FROM pods, nodes
                                            WHERE pods.node = nodes.name
                                              AND nodes.type = "compute" '''):
            mem_requests_for_all_pods = row[0]

        mem_scheduled_as_pct = 100.0 * mem_requests_for_all_pods / max_mem

        mem_unscheduled = max_mem - mem_requests_for_all_pods
        mem_unscheduled_as_pct = 100.0 * mem_unscheduled / max_mem

        return (mem_requests_for_all_pods, mem_scheduled_as_pct,
                mem_unscheduled, mem_unscheduled_as_pct)

    def get_oversub_cpu(self):
        ''' return percentage oversubscribed based on CPU limits on runing pods '''
        max_cpu = self.get_compute_nodes_max_schedulable_cpu()
        pod_cpu_limits = 0

        # get cpu limits for all running pods
        for row in self.sql_conn.execute('''SELECT SUM(pods.cpu_limits)
                                            FROM pods, nodes
                                            WHERE pods.node = nodes.name
                                              AND nodes.type = "compute" '''):
            pod_cpu_limits = row[0]

        return ((float(pod_cpu_limits) / max_cpu) * 100.0) - 100

    def get_oversub_mem(self):
        ''' return percentage oversubscribed based on memory limits on running pods '''
        max_mem = self.get_compute_nodes_max_schedulable_mem()
        pod_mem_limits = 0

        # get mem limits for all running pods
        for row in self.sql_conn.execute('''SELECT SUM(pods.memory_limits)
                                            FROM pods, nodes
                                            WHERE pods.node = nodes.name
                                              AND nodes.type = "compute" '''):
            pod_mem_limits = row[0]

        return ((float(pod_mem_limits) / max_mem) * 100.0) - 100

    def do_cpu_stats(self):
        ''' gather and report CPU statistics '''
        # CPU items
        zbx_key_max_schedulable_cpu = self.zbx_key_prefix + "max_schedulable.cpu"
        zbx_key_scheduled_cpu = self.zbx_key_prefix + "scheduled.cpu"
        zbx_key_scheduled_cpu_pct = self.zbx_key_prefix + "scheduled.cpu_pct"
        zbx_key_unscheduled_cpu = self.zbx_key_prefix + "unscheduled.cpu"
        zbx_key_unscheduled_cpu_pct = self.zbx_key_prefix + "unscheduled.cpu_pct"
        zbx_key_oversub_cpu_pct = self.zbx_key_prefix + "oversubscribed.cpu_pct"

        print "CPU Stats:"
        max_schedulable_cpu = self.get_compute_nodes_max_schedulable_cpu()
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_max_schedulable_cpu: max_schedulable_cpu})

        scheduled_cpu, scheduled_cpu_pct, unscheduled_cpu, unscheduled_cpu_pct = self.get_compute_nodes_scheduled_cpu(
        )
        oversub_cpu_pct = self.get_oversub_cpu()

        print "  Scheduled CPU for compute nodes:\t\t\t" + \
              "{:>15} milicores".format(scheduled_cpu)
        print "  Unscheduled CPU for compute nodes:\t\t\t" + \
              "{:>15} milicores".format(unscheduled_cpu)
        print "  Maximum (total) schedulable CPU for compute " + \
              "nodes:\t{:>15} milicores".format(max_schedulable_cpu)
        print "  Percent scheduled CPU for compute nodes:\t\t\t" + \
              "{:.2f}%".format(scheduled_cpu_pct)
        print "  Percent unscheduled CPU for compute nodes:\t\t\t" + \
              "{:.2f}%".format(unscheduled_cpu_pct)
        print "  Percent oversubscribed CPU for compute nodes: \t\t" + \
              "{:.2f}%".format(oversub_cpu_pct)
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_scheduled_cpu: scheduled_cpu})
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_scheduled_cpu_pct: int(scheduled_cpu_pct)})
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_unscheduled_cpu: unscheduled_cpu})
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_unscheduled_cpu_pct: int(unscheduled_cpu_pct)})
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_oversub_cpu_pct: int(oversub_cpu_pct)})

    def do_mem_stats(self):
        ''' gather and report memory statistics '''
        # Memory items
        zbx_key_max_schedulable_mem = self.zbx_key_prefix + "max_schedulable.mem"
        zbx_key_scheduled_mem = self.zbx_key_prefix + "scheduled.mem"
        zbx_key_scheduled_mem_pct = self.zbx_key_prefix + "scheduled.mem_pct"
        zbx_key_unscheduled_mem = self.zbx_key_prefix + "unscheduled.mem"
        zbx_key_unscheduled_mem_pct = self.zbx_key_prefix + "unscheduled.mem_pct"
        zbx_key_oversub_mem_pct = self.zbx_key_prefix + "oversubscribed.mem_pct"

        print "\nMemory Stats:"
        max_schedulable_mem = self.get_compute_nodes_max_schedulable_mem()
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_max_schedulable_mem: max_schedulable_mem})

        scheduled_mem, scheduled_mem_pct, unscheduled_mem, unscheduled_mem_pct = self.get_compute_nodes_scheduled_mem(
        )
        oversub_mem_pct = self.get_oversub_mem()
        print "  Scheduled mem for compute nodes:\t\t\t" + \
              "{:>20} bytes".format(scheduled_mem)
        print "  Unscheduled mem for compute nodes:\t\t\t" + \
              "{:>20} bytes".format(unscheduled_mem)
        print "  Maximum (total) schedulable memory for compute nodes:\t" + \
              "{:>20} bytes".format(max_schedulable_mem)
        print "  Percent scheduled mem for compute nodes:\t\t\t" + \
              "{:.2f}%".format(scheduled_mem_pct)
        print "  Percent unscheduled mem for compute nodes:\t\t\t" + \
              "{:.2f}%".format(unscheduled_mem_pct)
        print "  Percent oversubscribed mem for compute nodes: \t\t" + \
              "{:.2f}%".format(oversub_mem_pct)
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_scheduled_mem: scheduled_mem})
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_scheduled_mem_pct: int(scheduled_mem_pct)})
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_unscheduled_mem: unscheduled_mem})
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_unscheduled_mem_pct: int(unscheduled_mem_pct)})
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_oversub_mem_pct: int(oversub_mem_pct)})

    def cluster_capacity(self):
        ''' check capacity of compute nodes on cluster'''

        # Other zabbix items
        zbx_key_max_pods = "openshift.master.cluster.max_mem_pods_schedulable"

        self.sql_conn = sqlite3.connect(':memory:')

        self.load_nodes()
        self.load_pods()

        self.do_cpu_stats()
        self.do_mem_stats()

        print "\nOther stats:"
        largest = self.get_largest_pod()
        if self.args.debug:
            print "  Largest memory pod: {}".format(largest)

        schedulable = self.how_many_schedulable(largest)
        print "  Number of max-size nodes schedulable:\t\t\t\t{}".format(
            schedulable)
        self.zagg_sender.add_zabbix_keys({zbx_key_max_pods: schedulable})
示例#46
0
class OpsZaggClient(object):
    """ class to send data to zagg """
    def __init__(self):
        self.zagg_sender = None
        self.args = None
        self.config = None
        self.pcp_metrics = []
        self.heartbeat = None

    def run(self):
        """ main function to run the script """

        self.parse_args()
        self.parse_config(self.args.config_file)
        self.config_zagg_sender()

        if self.args.send_pcp_metrics:
            self.add_pcp_metrics()

        if self.args.send_heartbeat:
            self.add_heartbeat()

        if self.args.key and self.args.value:
            self.add_zabbix_key()

        if self.args.discovery_key and self.args.macro_string and self.args.macro_names:
            self.add_zabbix_dynamic_item()

        self.zagg_sender.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """
        parser = argparse.ArgumentParser(description='Zagg metric sender')
        parser.add_argument('--send-pcp-metrics',
                            help="send pcp metrics to zagg",
                            action="store_true")
        parser.add_argument('--send-heartbeat',
                            help="send heartbeat metric to zagg",
                            action="store_true")
        parser.add_argument('-s',
                            '--host',
                            help='specify host name as registered in Zabbix')
        parser.add_argument('-z', '--zagg-url', help='url of Zagg server')
        parser.add_argument('--zagg-user', help='username of the Zagg server')
        parser.add_argument('--zagg-pass', help='Password of the Zagg server')
        parser.add_argument('--zagg-ssl-verify',
                            default=None,
                            help='Whether to verify ssl certificates.')
        parser.add_argument('-v',
                            '--verbose',
                            action='store_true',
                            default=None,
                            help='Verbose?')
        parser.add_argument('--debug',
                            action='store_true',
                            default=None,
                            help='Debug?')
        parser.add_argument('-c',
                            '--config-file',
                            help='ops-zagg-client config file',
                            default='/etc/openshift_tools/zagg_client.yaml')

        key_value_group = parser.add_argument_group('Sending a Key-Value Pair')
        key_value_group.add_argument('-k', '--key', help='zabbix key')
        key_value_group.add_argument('-o', '--value', help='zabbix value')

        low_level_discovery_group = parser.add_argument_group(
            'Sending a Low Level Discovery Item')
        low_level_discovery_group.add_argument('--discovery-key',
                                               help='discovery key')
        low_level_discovery_group.add_argument('--macro-string',
                                               help='macro string')
        low_level_discovery_group.add_argument(
            '--macro-names', help='comma separated list of macro names')

        self.args = parser.parse_args()

    def parse_config(self, config_file):
        """ parse config file """
        self.config = yaml.load(file(config_file))

    def config_zagg_sender(self):
        """ configure the zagg_sender """

        zagg_url = self.args.zagg_url if self.args.zagg_url else self.config[
            'zagg']['url']
        zagg_user = self.args.zagg_user if self.args.zagg_user else self.config[
            'zagg']['user']
        zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config[
            'zagg']['pass']
        zagg_verbose = self.args.verbose if self.args.verbose else self.config[
            'zagg']['verbose']
        zagg_debug = self.args.debug if self.args.debug else self.config[
            'zagg']['debug']
        zagg_ssl_verify = self.args.zagg_ssl_verify if self.args.zagg_ssl_verify else self.config[
            'zagg']['ssl_verify']
        host = self.args.host if self.args.host else self.config['host']['name']

        if isinstance(zagg_verbose, str):
            zagg_verbose = (zagg_verbose == 'True')

        if isinstance(zagg_debug, str):
            zagg_debug = (zagg_debug == 'True')

        if isinstance(zagg_ssl_verify, str):
            zagg_ssl_verify = (zagg_ssl_verify == 'True')

        zagg_conn = ZaggConnection(
            url=zagg_url,
            user=zagg_user,
            password=zagg_password,
            ssl_verify=zagg_ssl_verify,
            debug=zagg_debug,
        )

        self.zagg_sender = ZaggSender(host, zagg_conn, zagg_verbose,
                                      zagg_debug)

    def add_heartbeat(self):
        """ crate a hearbeat metric """
        heartbeat = ZaggHeartbeat(
            templates=self.config['heartbeat']['templates'],
            hostgroups=self.config['heartbeat']['hostgroups'],
        )
        self.zagg_sender.add_heartbeat(heartbeat)

    def add_pcp_metrics(self):
        """ collect pcp metrics to send to ZaggSender """

        self.zagg_sender.add_pcp_metrics(self.config['pcp']['metrics'])

    def add_zabbix_key(self):
        """ send zabbix key/value pair to zagg """

        self.zagg_sender.add_zabbix_keys({self.args.key: self.args.value})

    def add_zabbix_dynamic_item(self):
        """ send zabbix low level discovery item to zagg """

        self.zagg_sender.add_zabbix_dynamic_item(
            self.args.discovery_key,
            self.args.macro_string,
            self.args.macro_names.split(','),
        )
示例#47
0
class OpenshiftClusterCapacity(object):
    ''' Checks for cluster capacity '''
    def __init__(self):
        self.args = None
        self.zagg_sender = None
        self.ora = None
        self.sql_conn = None

    def run(self):
        '''  Main function to run the check '''

        self.parse_args()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose,
                                      debug=self.args.debug)

        master_cfg = []
        with open(self.args.master_config, 'r') as yml:
            master_cfg = yaml.load(yml)
        self.ora = OpenshiftRestApi(
            host=master_cfg['oauthConfig']['masterURL'], verify_ssl=True)

        self.node_capacity()

        if not self.args.dry_run:
            self.zagg_sender.send_metrics()

    def parse_args(self):
        ''' parse the args from the cli '''

        parser = argparse.ArgumentParser(description='Cluster capacity sender')
        parser.add_argument(
            '--master-config',
            default='/etc/origin/master/master-config.yaml',
            help='Location of OpenShift master-config.yml file')
        parser.add_argument('-v',
                            '--verbose',
                            action='store_true',
                            default=None,
                            help='Verbose?')
        parser.add_argument('--debug',
                            action='store_true',
                            default=None,
                            help='Debug?')
        parser.add_argument('--dry-run',
                            action='store_true',
                            default=False,
                            help='Do not sent results to Zabbix')

        self.args = parser.parse_args()

    def load_nodes(self):
        ''' load nodes into SQL '''

        self.sql_conn.execute('''CREATE TABLE nodes
                                 (name text, type text, api text,
                                  max_cpu integer, max_memory integer,
                                  max_pods integer)''')
        response = self.ora.get('/api/v1/nodes')

        for new_node in response['items']:
            # Skip nodes not in 'Ready' state
            node_ready = False
            for condition in new_node['status']['conditions']:
                if condition['type'] == 'Ready' and \
                   condition['status'] == 'True':
                    node_ready = True
            if not node_ready:
                continue

            node = {}
            node['name'] = new_node['metadata']['name']
            node['type'] = new_node['metadata']['labels']['type']
            node['api'] = new_node['metadata']['selfLink']

            if 'allocatable' in new_node['status']:
                cpu = new_node['status']['allocatable']['cpu']
                mem = new_node['status']['allocatable']['memory']
                node['max_pods'] = int(
                    new_node['status']['allocatable']['pods'])
            else:
                cpu = new_node['status']['capacity']['cpu']
                mem = new_node['status']['capacity']['memory']
                node['max_pods'] = int(new_node['status']['capacity']['pods'])

            node['max_cpu'] = to_milicores(cpu)
            node['max_memory'] = to_bytes(mem)

            if self.args.debug:
                print "Adding node: {}".format(str(node))

            self.sql_conn.execute(
                'INSERT INTO nodes VALUES (?,?,?,?,?,?)',
                (node['name'], node['type'], node['api'], node['max_cpu'],
                 node['max_memory'], node['max_pods']))

    @staticmethod
    def load_container_limits(pod, containers):
        ''' process/store container limits data '''

        for container in containers:
            if 'limits' in container['resources']:
                cpu = container['resources']['limits'].get('cpu')
                if cpu:
                    pod['cpu_limits'] = pod.get('cpu_limits', 0) + \
                                        to_milicores(cpu)

                mem = container['resources']['limits'].get('memory')
                if mem:
                    pod['memory_limits'] = pod.get('memory_limits', 0) + \
                                           to_bytes(mem)

            if 'requests' in container['resources']:
                cpu = container['resources']['requests'].get('cpu')
                if cpu:
                    pod['cpu_requests'] = pod.get('cpu_requests', 0) + \
                                          to_milicores(cpu)

                mem = container['resources']['requests'].get('memory')
                if mem:
                    pod['memory_requests'] = pod.get('memory_requests', 0) + \
                                             to_bytes(mem)

    def load_pods(self):
        ''' put pod details into db '''

        self.sql_conn.execute('''CREATE TABLE pods
                                 (name text, namespace text, api text,
                                  cpu_limits integer, cpu_requets integer,
                                  memory_limits integer,
                                  memory_requests integer, node text)''')
        response = self.ora.get('/api/v1/pods')

        for new_pod in response['items']:
            if new_pod['status']['phase'] != 'Running':
                continue

            pod = {}
            pod['name'] = new_pod['metadata']['name']
            pod['namespace'] = new_pod['metadata']['namespace']
            pod['api'] = new_pod['metadata']['selfLink']
            pod['node'] = new_pod['spec']['nodeName']
            self.load_container_limits(pod, new_pod['spec']['containers'])

            self.sql_conn.execute(
                'INSERT INTO pods VALUES (?,?,?,?,?,?,?,?)',
                (pod['name'], pod['namespace'], pod['api'],
                 pod.get('cpu_limits'), pod.get('cpu_requests'),
                 pod.get('memory_limits'), pod.get('memory_requests'),
                 pod['node']))

    def get_memory_percentage(self):
        ''' calculate pod memory limits as a percentage
            of cluster (compute-node) memory capacity '''

        node_mem = 0
        pod_mem = 0

        for row in self.sql_conn.execute('''SELECT SUM(nodes.max_memory)
                                            FROM nodes
                                            WHERE nodes.type="compute"'''):
            node_mem = row[0]

        for row in self.sql_conn.execute('''SELECT SUM(pods.memory_limits)
                                            FROM pods, nodes
                                            WHERE pods.node=nodes.name
                                              AND nodes.type="compute"'''):
            pod_mem = row[0]

        return float(100) * pod_mem / node_mem

    def get_largest_pod(self):
        ''' return memory limit for largest pod '''

        max_pod = 0
        for row in self.sql_conn.execute('''SELECT MAX(memory_limits)
                                            FROM pods, nodes
                                            WHERE pods.node=nodes.name
                                              AND nodes.type="compute"'''):
            max_pod = row[0]

        return max_pod

    def how_many_schedulable(self, size):
        ''' return how many pods with memory 'size' can be scheduled '''

        nodes = {}

        # get max mem for each compute node
        for row in self.sql_conn.execute('''SELECT nodes.name, nodes.max_memory
                                            FROM nodes
                                            WHERE nodes.type="compute"'''):
            nodes[row[0]] = {'max_memory': row[1]}

        # get memory allocated/granted for each compute node
        for row in self.sql_conn.execute('''SELECT nodes.name,
                                                   SUM(pods.memory_limits)
                                            FROM pods, nodes
                                            WHERE pods.node=nodes.name
                                              AND nodes.type="compute"
                                            GROUP BY nodes.name'''):
            nodes[row[0]]['memory_allocated'] = row[1]

        schedulable = 0
        for node in nodes.keys():
            available = nodes[node]['max_memory'] - \
                        nodes[node]['memory_allocated']
            num = available / size
            # ignore negative number (overcommitted nodes)
            if num > 0:
                schedulable += num

        return schedulable

    def node_capacity(self):
        ''' check capacity of compute nodes '''

        zbx_key_mem_alloc = "openshift.master.cluster.memory_allocation"
        zbx_key_max_pods = "openshift.master.cluster.max_mem_pods_schedulable"

        self.sql_conn = sqlite3.connect(':memory:')

        self.load_nodes()
        self.load_pods()

        memory_percentage = self.get_memory_percentage()

        largest = self.get_largest_pod()
        if self.args.debug:
            print "Largest memory pod: {}".format(largest)

        schedulable = self.how_many_schedulable(largest)

        print "Percentage of memory allocated: {}".format(memory_percentage)
        print "Number of max-size nodes schedulable: {}".format(schedulable)

        self.zagg_sender.add_zabbix_keys(
            {zbx_key_mem_alloc: int(memory_percentage)})
        self.zagg_sender.add_zabbix_keys({zbx_key_max_pods: schedulable})
示例#48
0
class EtcdStatusZaggSender(object):
    """ class to gather all metrics from etcd daemons """

    def __init__(self):
        self.api_host = None
        self.args = None
        self.parser = None
        self.config = None
        self.etcd_ping = 0
        self.default_config = '/etc/openshift_tools/etcd_metrics.yml'
        self.zagg_sender = ZaggSender()

    def parse_args(self):
        '''Parse the arguments for this script'''
        self.parser = argparse.ArgumentParser(description="Script that gathers metrics from etcd")
        self.parser.add_argument('-d', '--debug', default=False,
                                 action="store_true", help="debug mode")
        self.parser.add_argument('-v', '--verbose', default=False,
                                 action="store_true", help="Verbose?")
        self.parser.add_argument('-t', '--test', default=False,
                                 action="store_true", help="Run the script but don't send to zabbix")
        self.parser.add_argument('-c', '--configfile', default=self.default_config,
                                 help="Config file that contains metrics to be collected, defaults to etcd_metrics.yml")

        self.args = self.parser.parse_args()

    def call_etcd_api(self, rest_path):
        '''Makes the API calls to rest endpoints in etcd'''
        try:
            response = requests.get(self.api_host + rest_path,
                                    cert=(self.config['etcd_info']['files']['ssl_client_cert'],
                                          self.config['etcd_info']['files']['ssl_client_key']),
                                    verify=False)
            self.etcd_ping = 1
        except requests.exceptions.ConnectionError as ex:
            print "ERROR talking to etcd API: {0}".format(ex.message)
        else:
            return response.content

    def json_metric(self, met):
        '''process json data from etcd'''
        return_data = {}
        api_response = self.call_etcd_api(met['path'])
        if api_response:
            content = json.loads(api_response)

            for item in met['values']:
                return_data[met['prefix'] + item['zab_key']] = content[item['src']]

        return return_data

    def text_metric(self, met):
        '''process text value from etcd'''
        return_data = {}

        content = self.call_etcd_api(met['path'])
        if content:
            for metric in text_string_to_metric_families(content):
                # skipping histogram and summary types unless we find a good way to add them to zabbix (unlikely)
                if metric.type in ['histogram', 'summary']:
                    continue
                elif metric.type in ['counter', 'gauge'] and metric.name in met['values']:
                    zab_metric_name = met['prefix'] + metric.name.replace('_', '.')
                    if len(metric.samples) > 1:
                        if met['values'][metric.name]:
                            sub_key = met['values'][metric.name]
                        for singlemetric in metric.samples:
                            return_data['{0}.{1}'.format(zab_metric_name, singlemetric[1][sub_key])] = singlemetric[2]
                    else:
                        return_data[zab_metric_name] = metric.samples[0][2]
                else:
                    if self.args.debug:
                        print 'Got unknown type of metric from etcd, skipping it: ({0}) '.format(metric.type)

        return return_data

    def run(self):
        ''' Get data from etcd API
        '''
        self.parse_args()

        try:
            with open(self.args.configfile, 'r') as configfile:
                self.config = yaml.load(configfile)
        except IOError as ex:
            print 'There was a problem opening the config file: {0}'.format(ex)
            print 'Exiting'
            sys.exit(1)

        # find out the etcd port
        try:
            with open(self.config['etcd_info']['files']['openshift_master_config'], 'r') as f:
                om_config = yaml.load(f)
        except IOError as ex:
            print 'Problem opening openshift master config: {0}'.format(ex)
            sys.exit(2)
        else:
            self.api_host = om_config["etcdClientInfo"]["urls"][0]

        # let's get the metrics
        for metric in self.config['etcd_info']['metrics']:
            if metric['type'] == 'text':
                self.zagg_sender.add_zabbix_keys(self.text_metric(metric))
            elif metric['type'] == 'json':
                self.zagg_sender.add_zabbix_keys(self.json_metric(metric))

        self.send_zagg_data()

    def send_zagg_data(self):
        ''' Sending the data to zagg or displaying it in console when test option is used
        '''
        self.zagg_sender.add_zabbix_keys({'openshift.master.etcd.ping' : self.etcd_ping})

        if not self.args.test:
            self.zagg_sender.send_metrics()
        else:
            self.zagg_sender.print_unique_metrics()
def main():
    ''' Get data from etcd API
    '''

    SSL_CLIENT_CERT = '/etc/openshift/master/master.etcd-client.crt'
    SSL_CLIENT_KEY = '/etc/openshift/master/master.etcd-client.key'
    OPENSHIFT_MASTER_CONFIG = '/etc/openshift/master/master-config.yaml'

    # find out the etcd port
    with open(OPENSHIFT_MASTER_CONFIG, 'r') as f:
        config = yaml.load(f)

    API_HOST = config["etcdClientInfo"]["urls"][0]

    # define the store API URL
    API_URL = API_HOST + "/v2/stats/store"

    zs = ZaggSender()
    # Fetch the store statics from API
    try:
        request = requests.get(API_URL,
                               cert=(SSL_CLIENT_CERT, SSL_CLIENT_KEY),
                               verify=False)
        content = json.loads(request.content)
        etcd_ping = 1

        # parse the items and add it as metrics
        zs.add_zabbix_keys(
            {'openshift.master.etcd.create.success': content['createSuccess']})
        zs.add_zabbix_keys(
            {'openshift.master.etcd.create.fail': content['createFail']})
        zs.add_zabbix_keys(
            {'openshift.master.etcd.delete.success': content['deleteSuccess']})
        zs.add_zabbix_keys(
            {'openshift.master.etcd.delete.fail': content['deleteFail']})
        zs.add_zabbix_keys(
            {'openshift.master.etcd.get.success': content['getsSuccess']})
        zs.add_zabbix_keys(
            {'openshift.master.etcd.get.fail': content['getsFail']})
        zs.add_zabbix_keys(
            {'openshift.master.etcd.set.success': content['setsSuccess']})
        zs.add_zabbix_keys(
            {'openshift.master.etcd.set.fail': content['setsFail']})
        zs.add_zabbix_keys(
            {'openshift.master.etcd.update.success': content['updateSuccess']})
        zs.add_zabbix_keys(
            {'openshift.master.etcd.update.fail': content['updateFail']})
        zs.add_zabbix_keys(
            {'openshift.master.etcd.watchers': content['watchers']})

    except requests.exceptions.ConnectionError as ex:
        print "ERROR talking to etcd API: %s" % ex.message
        etcd_ping = 0

    zs.add_zabbix_keys({'openshift.master.etcd.ping': etcd_ping})

    # Finally, sent them to zabbix
    zs.send_metrics()
示例#50
0
class OpsZaggClient(object):
    """ class to send data to zagg """

    def __init__(self):
        self.zagg_sender = None
        self.args = None
        self.config = None
        self.pcp_metrics = []
        self.heartbeat = None

    def run(self):
        """ main function to run the script """

        self.parse_args()
        self.parse_config(self.args.config_file)
        self.config_zagg_sender()

        if self.args.send_pcp_metrics:
            self.add_pcp_metrics()

        if self.args.send_heartbeat:
            self.add_heartbeat()

        if self.args.key and self.args.value:
            self.add_zabbix_key()

        if self.args.discovery_key and self.args.macro_string and self.args.macro_names:
            self.add_zabbix_dynamic_item()

        self.zagg_sender.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """
        parser = argparse.ArgumentParser(description='Zagg metric sender')
        parser.add_argument('--send-pcp-metrics', help="send pcp metrics to zagg", action="store_true")
        parser.add_argument('--send-heartbeat', help="send heartbeat metric to zagg", action="store_true")
        parser.add_argument('-s', '--host', help='specify host name as registered in Zabbix')
        parser.add_argument('-z', '--zagg-url', help='url of Zagg server')
        parser.add_argument('--zagg-user', help='username of the Zagg server')
        parser.add_argument('--zagg-pass', help='Password of the Zagg server')
        parser.add_argument('--zagg-ssl-verify', default=None, help='Whether to verify ssl certificates.')
        parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?')
        parser.add_argument('--debug', action='store_true', default=None, help='Debug?')
        parser.add_argument('-c', '--config-file', help='ops-zagg-client config file',
                            default='/etc/openshift_tools/zagg_client.yaml')

        key_value_group = parser.add_argument_group('Sending a Key-Value Pair')
        key_value_group.add_argument('-k', '--key', help='zabbix key')
        key_value_group.add_argument('-o', '--value', help='zabbix value')

        low_level_discovery_group = parser.add_argument_group('Sending a Low Level Discovery Item')
        low_level_discovery_group.add_argument('--discovery-key', help='discovery key')
        low_level_discovery_group.add_argument('--macro-string', help='macro string')
        low_level_discovery_group.add_argument('--macro-names', help='comma separated list of macro names')

        self.args = parser.parse_args()

    def parse_config(self, config_file):
        """ parse config file """
        self.config = yaml.load(file(config_file))

    def config_zagg_sender(self):
        """ configure the zagg_sender """

        zagg_url = self.args.zagg_url if self.args.zagg_url else self.config['zagg']['url']
        zagg_user = self.args.zagg_user if self.args.zagg_user else self.config['zagg']['user']
        zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config['zagg']['pass']
        zagg_verbose = self.args.verbose if self.args.verbose else self.config['zagg']['verbose']
        zagg_debug = self.args.debug if self.args.debug else self.config['zagg']['debug']
        zagg_ssl_verify = self.args.zagg_ssl_verify if self.args.zagg_ssl_verify else self.config['zagg']['ssl_verify']
        host = self.args.host if self.args.host else self.config['host']['name']

        if isinstance(zagg_verbose, str):
            zagg_verbose = (zagg_verbose == 'True')

        if isinstance(zagg_debug, str):
            zagg_debug = (zagg_debug == 'True')

        if isinstance(zagg_ssl_verify, str):
            zagg_ssl_verify = (zagg_ssl_verify == 'True')

        zagg_conn = ZaggConnection(url=zagg_url,
                                   user=zagg_user,
                                   password=zagg_password,
                                   ssl_verify=zagg_ssl_verify,
                                   debug=zagg_debug,
                                  )

        self.zagg_sender = ZaggSender(host, zagg_conn, zagg_verbose, zagg_debug)

    def add_heartbeat(self):
        """ crate a hearbeat metric """
        heartbeat = ZaggHeartbeat(templates=self.config['heartbeat']['templates'],
                                  hostgroups=self.config['heartbeat']['hostgroups'],
                                 )
        self.zagg_sender.add_heartbeat(heartbeat)

    def add_pcp_metrics(self):
        """ collect pcp metrics to send to ZaggSender """

        self.zagg_sender.add_pcp_metrics(self.config['pcp']['metrics'])

    def add_zabbix_key(self):
        """ send zabbix key/value pair to zagg """

        self.zagg_sender.add_zabbix_keys({self.args.key : self.args.value})

    def add_zabbix_dynamic_item(self):
        """ send zabbix low level discovery item to zagg """

        self.zagg_sender.add_zabbix_dynamic_item(self.args.discovery_key,
                                                 self.args.macro_string,
                                                 self.args.macro_names.split(','),
                                                )
class OpenshiftWebServiceChecker(object):
    """ Checks for Openshift Pods """

    def __init__(self):
        self.args = None
        self.ora = None
        self.zagg_sender = None
        self.service_ip = None
        self.service_port = '443'

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.ora = OpenshiftRestApi()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        try:
            self.get_service()
            status = self.check_service()

        except Exception as ex:
            print "Problem retreiving data: %s " % ex.message

        self.zagg_sender.add_zabbix_keys({
            "openshift.webservice.{}.status".format(self.args.pod) : status})

        self.zagg_sender.send_metrics()

    def get_service(self):
        """ Gets the service for a pod """

        print "\nLooking up services for pod\n"

        api_url = "/api/v1/services"
        if (str(self.args.namespace) != "None") & \
            (str(self.args.namespace) != "all"):
            api_url = '/api/v1/namespaces/{}/services'.format(self.args.namespace)

        print "using api url {}".format(api_url)

        api_yaml = self.ora.get(api_url, rtype='text')
        services = yaml.safe_load(api_yaml)

        for service in services["items"]:
            if self.args.pod and \
                self.args.pod in service["metadata"]["name"]:
                print "service IP is {}".format(service["spec"]["clusterIP"])
                self.service_ip = service["spec"]["clusterIP"]
                if self.args.portname != None:
                    for port in service["spec"]["ports"]:
                        if port["name"] == self.args.portname:
                            self.service_port = port["port"]
                else:
                    self.service_port = service["spec"]["ports"][0]["port"]
            else:
                pass

    def check_service(self):
        """ Checks the web service """

        print "\nChecking web service\n"

        if self.args.insecure:
            proto = 'http'
        else:
            proto = 'https'

        url = '{}://{}:{}/{}'.format(
            proto,
            self.service_ip,
            self.service_port,
            self.args.url,
        )

        try:
            print "Performing check on URL: {}".format(url)
            response = urllib2.urlopen(url, timeout=30)

            if str(response.getcode()) == self.args.status:
                if self.args.content == None \
                    or self.args.content in response.read():
                    return True

        except urllib2.URLError:
            print "Received error accessing URL: {}".format(url)
        except socket.timeout:
            print "Timed out accessing URL: {}".format(url)

        return False


    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description='Openshift pod sender')
        parser.add_argument('-p', '--pod', default=None, help='Check for pod with this specific name')
        parser.add_argument('-n', '--namespace', default=None, help='Check for pods in this namespace - "all" for all')
        parser.add_argument('-P', '--portname', default=None, help='name of the port to check')
        parser.add_argument('-u', '--url', default="/", help='URL to check. Defaults to "/".')
        parser.add_argument('-s', '--status', default="200", help='HTTP status code to expect. Defaults to 200')
        parser.add_argument('-c', '--content', default=None, help='Looks for a string in the content of the response.')
        parser.add_argument('-i', '--insecure', help='Use insecure http connection')
        parser.add_argument('-S', '--secure', help='Use secure https connection (default)')
        parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?')
        parser.add_argument('--debug', action='store_true', default=None, help='Debug?')

        self.args = parser.parse_args()
class OpsZaggClient(object):
    """ class to send data to zagg """

    def __init__(self):
        self.zagg_sender = None
        self.args = None
        self.config = None
        self.pcp_metrics = []
        self.heartbeat = None

    def run(self):
        """ main function to run the script """

        self.parse_args()
        self.parse_config(self.args.config_file)
        self.config_zagg_sender()

        if self.args.send_pcp_metrics:
            self.add_pcp_metrics()

        if self.args.send_heartbeat:
            self.add_heartbeat()

        if self.args.key and self.args.value:
            self.add_zabbix_key()

        self.zagg_sender.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """
        parser = argparse.ArgumentParser(description='Zagg metric sender')
        parser.add_argument('--send-pcp-metrics', help="send pcp metrics to zagg", action="store_true")
        parser.add_argument('--send-heartbeat', help="send heartbeat metric to zagg", action="store_true")
        parser.add_argument('-s', '--host', help='specify host name as registered in Zabbix')
        parser.add_argument('-z', '--zagg-server', help='hostname of IP of Zagg server')
        parser.add_argument('--zagg-user', help='username of the Zagg server')
        parser.add_argument('--zagg-pass', help='password of the Zagg server')
        parser.add_argument('-k', '--key', help='zabbix key')
        parser.add_argument('-o', '--value', help='zabbix value')
        parser.add_argument('-c', '--config-file', help='ops-zagg-client config file',
                            default='/etc/openshift_tools/zagg_client.yaml')
        self.args = parser.parse_args()

    def parse_config(self, config_file):
        """ parse config file """
        self.config = yaml.load(file(config_file))

    def config_zagg_sender(self):
        """ configure the zagg_sender """

        zagg_server = self.args.zagg_server if self.args.zagg_server else self.config['zagg']['host']
        zagg_user = self.args.zagg_user if self.args.zagg_user else self.config['zagg']['user']
        zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config['zagg']['pass']
        host = self.args.host if self.args.host else self.config['host']['name']

        zagg_conn = ZaggConnection(host=zagg_server,
                                   user=zagg_user,
                                   password=zagg_password,
                                  )

        self.zagg_sender = ZaggSender(host, zagg_conn)

    def add_heartbeat(self):
        """ crate a hearbeat metric """
        heartbeat = ZaggHeartbeat(templates=self.config['heartbeat']['templates'],
                                  hostgroups=self.config['heartbeat']['hostgroups'],
                                 )
        self.zagg_sender.add_heartbeat(heartbeat)

    def add_pcp_metrics(self):
        """ collect pcp metrics to send to ZaggSender """

        self.zagg_sender.add_pcp_metrics(self.config['pcp']['metrics'])

    def add_zabbix_key(self):
        """ send zabbix key/value pair to zagg """
        self.zagg_sender.add_zabbix_keys({self.args.key : self.args.value})
def send_zagg_data(key_zabbix, result):
    ''' send data to Zagg'''
    zgs = ZaggSender()
    zgs.add_zabbix_keys({key_zabbix: result})
    zgs.send_metrics()
def send_zagg_data(build_ran, create_app, http_code, run_time):
    """ send data to Zagg"""
    logger.debug("send_zagg_data()")

    zgs_time = time.time()
    zgs = ZaggSender()
    logger.info("Send data to Zagg")

    if build_ran == 1:
        zgs.add_zabbix_keys({'openshift.master.app.build.create': create_app})
        zgs.add_zabbix_keys(
            {'openshift.master.app.build.create.code': http_code})
        zgs.add_zabbix_keys(
            {'openshift.master.app.build.create.time': run_time})
    else:
        zgs.add_zabbix_keys({'openshift.master.app.create': create_app})
        zgs.add_zabbix_keys({'openshift.master.app.create.code': http_code})
        zgs.add_zabbix_keys({'openshift.master.app.create.time': run_time})

    try:
        zgs.send_metrics()
        logger.info("Data sent to Zagg in %s seconds",
                    str(time.time() - zgs_time))
    except:
        logger.error("Error sending data to Zagg: %s \n %s ",
                     sys.exc_info()[0],
                     sys.exc_info()[1])
        if not dds.is_loopback:
            dds.data_space_total = dds.data_space_used + dds.data_space_available
            dds.metadata_space_total = dds.metadata_space_used + dds.metadata_space_available

        dds.data_space_percent_available = (dds.data_space_available /
                                            dds.data_space_total) * 100
        dds.metadata_space_percent_available = (dds.metadata_space_available /
                                                dds.metadata_space_total) * 100

        return dds


if __name__ == "__main__":
    keys = None
    exit_code = 0
    zs = ZaggSender()
    try:
        cli = AutoVersionClient(base_url='unix://var/run/docker.sock')
        dw = DockerWatcher(cli)
        dw_dds = dw.get_disk_usage()

        keys = {
            'docker.storage.data.space.used': dw_dds.data_space_used,
            'docker.storage.data.space.available': dw_dds.data_space_available,
            'docker.storage.data.space.percent_available':
            dw_dds.data_space_percent_available,
            'docker.storage.data.space.total': dw_dds.data_space_total,
            'docker.storage.metadata.space.used': dw_dds.metadata_space_used,
            'docker.storage.metadata.space.available':
            dw_dds.metadata_space_available,
            'docker.storage.metadata.space.percent_available':
class CertificateReporting(object):
    ''' class with ability to parse through x509 certificates to extract
        and report to zabbix the expiration date assocated with the cert '''
    def __init__(self):
        ''' constructor '''
        self.args = None
        self.current_date = datetime.datetime.today()
        self.parse_args()
        self.zsend = ZaggSender(debug=self.args.debug)

    def dprint(self, msg):
        ''' debug printer '''

        if self.args.debug:
            print msg

    def parse_args(self):
        ''' parse command line args '''
        argparser = argparse.ArgumentParser(description='certificate checker')
        argparser.add_argument('--debug', default=False, action='store_true')
        argparser.add_argument(
            '--cert-list',
            default="/etc/origin",
            type=str,
            help='comma-separated list of dirs/certificates')
        self.args = argparser.parse_args()

    def days_to_expiration(self, cert_file):
        ''' return days to expiration for a certificate '''

        crypto = OpenSSL.crypto

        cert = open(cert_file).read()
        certificate = crypto.load_certificate(crypto.FILETYPE_PEM, cert)
        expiration_date_asn1 = certificate.get_notAfter()
        # expiration returned in ASN.1 GENERALIZEDTIME format
        # YYYYMMDDhhmmss with a trailing 'Z'
        expiration_date = parser.parse(expiration_date_asn1).replace(
            tzinfo=None)

        delta = expiration_date - self.current_date
        return delta.days

    def process_certificates(self):
        ''' check through list of certificates/directories '''

        for cert in self.args.cert_list.split(','):
            if not os.path.exists(cert):
                self.dprint("{} does not exist. skipping.".format(cert))
                continue

            mode = os.stat(cert).st_mode
            if S_ISDIR(mode):
                self.all_certs_in_dir(cert)
            elif S_ISREG(mode):
                days = self.days_to_expiration(cert)
                self.dprint("{} in {} days".format(cert, days))
                self.add_to_zabbix(cert, days)
            else:
                self.dprint("not a file. not a directory. skipping.")

        # now push out all queued up item(s) to zabbix
        self.zsend.send_metrics()

    def add_to_zabbix(self, certificate, days_to_expiration):
        ''' queue up item for submission to zabbix '''

        self.zsend.add_zabbix_dynamic_item(CERT_DISC_KEY, CERT_DISC_MACRO,
                                           [certificate])
        zbx_key = "{}[{}]".format(CERT_DISC_KEY, certificate)
        self.zsend.add_zabbix_keys({zbx_key: days_to_expiration})

    def all_certs_in_dir(self, directory):
        ''' recursively go through all *.crt files in 'directory' '''

        for root, _, filenames in os.walk(directory):
            for filename in filenames:
                if filename.endswith('.crt'):
                    full_path = os.path.join(root, filename)
                    days = self.days_to_expiration(full_path)
                    self.dprint("{} in {} days".format(full_path, days))
                    self.add_to_zabbix(full_path, days)
 def __init__(self):
     ''' constructor '''
     self.args = None
     self.current_date = datetime.datetime.today()
     self.parse_args()
     self.zsend = ZaggSender(debug=self.args.debug)
"""
    docker container DNS tester
"""

# Adding the ignore because it does not like the naming of the script
# to be different than the class name
# pylint: disable=invalid-name


from docker import AutoVersionClient
from openshift_tools.monitoring.zagg_sender import ZaggSender

ZBX_KEY = "docker.container.dns.resolution"

if __name__ == "__main__":
    cli = AutoVersionClient(base_url="unix://var/run/docker.sock")
    container = cli.create_container(
        image="docker-registry.ops.rhcloud.com/ops/oso-rhel7-host-monitoring", command="getent hosts redhat.com"
    )
    cli.start(container=container.get("Id"))
    exit_code = cli.wait(container)
    cli.remove_container(container.get("Id"))

    zs = ZaggSender()
    zs.add_zabbix_keys({ZBX_KEY: exit_code})

    print "Sending these metrics:"
    print ZBX_KEY + ": " + str(exit_code)
    zs.send_metrics()
    print "\nDone.\n"
class OpenshiftWebServiceChecker(object):
    """ Checks for Openshift Pods """
    def __init__(self):
        self.args = None
        self.ora = None
        self.zagg_sender = None
        self.service_ip = None
        self.service_port = '443'

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.ora = OpenshiftRestApi()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose,
                                      debug=self.args.debug)

        try:
            self.get_service()
            status = self.check_service()

        except Exception as ex:
            print "Problem retreiving data: %s " % ex.message

        self.zagg_sender.add_zabbix_keys(
            {"openshift.webservice.{}.status".format(self.args.pod): status})

        self.zagg_sender.send_metrics()

    def get_service(self):
        """ Gets the service for a pod """

        print "\nLooking up services for pod\n"

        api_url = "/api/v1/services"
        if (str(self.args.namespace) != "None") & \
            (str(self.args.namespace) != "all"):
            api_url = '/api/v1/namespaces/{}/services'.format(
                self.args.namespace)

        print "using api url {}".format(api_url)

        api_yaml = self.ora.get(api_url, rtype='text')
        services = yaml.safe_load(api_yaml)

        for service in services["items"]:
            if self.args.pod and \
                self.args.pod in service["metadata"]["name"]:
                print "service IP is {}".format(service["spec"]["clusterIP"])
                self.service_ip = service["spec"]["clusterIP"]
                if self.args.portname != None:
                    for port in service["spec"]["ports"]:
                        if port["name"] == self.args.portname:
                            self.service_port = port["port"]
                else:
                    self.service_port = service["spec"]["ports"][0]["port"]
            else:
                pass

    def check_service(self):
        """ Checks the web service """

        print "\nChecking web service\n"

        if self.args.insecure:
            proto = 'http'
        else:
            proto = 'https'

        url = '{}://{}:{}/{}'.format(
            proto,
            self.service_ip,
            self.service_port,
            self.args.url,
        )

        try:
            print "Performing check on URL: {}".format(url)
            response = urllib2.urlopen(url, timeout=30)

            if str(response.getcode()) == self.args.status:
                if self.args.content == None \
                    or self.args.content in response.read():
                    return True

        except urllib2.URLError:
            print "Received error accessing URL: {}".format(url)
        except socket.timeout:
            print "Timed out accessing URL: {}".format(url)

        return False

    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description='Openshift pod sender')
        parser.add_argument('-p',
                            '--pod',
                            default=None,
                            help='Check for pod with this specific name')
        parser.add_argument(
            '-n',
            '--namespace',
            default=None,
            help='Check for pods in this namespace - "all" for all')
        parser.add_argument('-P',
                            '--portname',
                            default=None,
                            help='name of the port to check')
        parser.add_argument('-u',
                            '--url',
                            default="/",
                            help='URL to check. Defaults to "/".')
        parser.add_argument('-s',
                            '--status',
                            default="200",
                            help='HTTP status code to expect. Defaults to 200')
        parser.add_argument(
            '-c',
            '--content',
            default=None,
            help='Looks for a string in the content of the response.')
        parser.add_argument('-i',
                            '--insecure',
                            help='Use insecure http connection')
        parser.add_argument('-S',
                            '--secure',
                            help='Use secure https connection (default)')
        parser.add_argument('-v',
                            '--verbose',
                            action='store_true',
                            default=None,
                            help='Verbose?')
        parser.add_argument('--debug',
                            action='store_true',
                            default=None,
                            help='Debug?')

        self.args = parser.parse_args()