class OpenshiftPodChecker(object): """ Checks for Openshift Pods """ def __init__(self): self.args = None self.ora = None self.zagg_sender = None def run(self): """ Main function to run the check """ self.parse_args() self.ora = OpenshiftRestApi() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) try: self.get_pods() except Exception as ex: print "Problem retreiving pod data: %s " % ex.message self.zagg_sender.send_metrics() def get_pods(self): """ Gets pod data """ print "\nPerforming pod check ...\n" api_url = "/api/v1/pods" if (str(self.args.namespace) != "None") & (str(self.args.namespace) != "all"): api_url = "/api/v1/namespaces/{}/pods".format(self.args.namespace) api_yaml = self.ora.get(api_url, rtype="text") pods = yaml.safe_load(api_yaml) pod_count = 0 for pod in pods["items"]: if self.args.pod and self.args.pod in pod["metadata"]["name"]: print "status of {} is {}".format(pod["metadata"]["name"], pod["status"]["phase"]) if pod["status"]["phase"] == "Running": pod_count += 1 else: pass self.zagg_sender.add_zabbix_keys({"service.pod.{}.count".format(self.args.pod): pod_count}) def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description="Openshift pod sender") parser.add_argument("-p", "--pod", default=None, help="Check for pod with this specific name") parser.add_argument("-n", "--namespace", default=None, help='Check for pods in this namespace - "all" for all') parser.add_argument("-v", "--verbose", action="store_true", default=None, help="Verbose?") parser.add_argument("--debug", action="store_true", default=None, help="Debug?") self.args = parser.parse_args()
def main(): ''' Do the application creation ''' print '################################################################################' print ' Starting App Create' print '################################################################################' namespace = 'ops-monitor-' + os.environ['ZAGG_CLIENT_HOSTNAME'] oocmd = OpenShiftOC(namespace, 'hello-openshift', verbose=False) app = 'openshift/hello-openshift:v1.0.6' start_time = time.time() if namespace in oocmd.get_projects(): oocmd.delete_project() oocmd.new_project() oocmd.new_app(app) create_app = 1 pod = None # Now we wait until the pod comes up for _ in range(24): time.sleep(5) pod = oocmd.get_pod() if pod and pod['status']: print 'Polling Pod status: %s' % pod['status']['phase'] if pod and pod['status']['phase'] == 'Running' and pod['status'].has_key('podIP'): #c_results = curl(pod['status']['podIP'], '8080') #if c_results == 'Hello OpenShift!\n': print 'Finished.' print 'State: Success' print 'Time: %s' % str(time.time() - start_time) create_app = 0 break else: print 'Finished.' print 'State: Fail' print 'Time: %s' % str(time.time() - start_time) print 'Fetching Events:' oocmd.verbose = True print oocmd.get_events() print 'Fetching Logs:' print oocmd.get_logs() print 'Fetching Pod:' print pod if namespace in oocmd.get_projects(): oocmd.delete_project() zgs = ZaggSender() zgs.add_zabbix_keys({'openshift.master.app.create': create_app}) zgs.send_metrics()
def config_zagg_sender(self): """ configure the zagg_sender """ zagg_url = self.args.zagg_url if self.args.zagg_url else self.config['zagg']['url'] zagg_user = self.args.zagg_user if self.args.zagg_user else self.config['zagg']['user'] zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config['zagg']['pass'] zagg_ssl_verify = self.config['zagg'].get('ssl_verify', False) zagg_verbose = self.config['zagg'].get('verbose', False) if isinstance(zagg_verbose, str): zagg_verbose = (zagg_verbose == 'True') if self.args.verbose: zagg_verbose = self.args.verbose if isinstance(zagg_ssl_verify, str): zagg_ssl_verify = (zagg_ssl_verify == 'True') if self.args.zagg_ssl_verify: zagg_ssl_verify = self.args.zagg_ssl_verify zagg_conn = ZaggConnection(url=zagg_url, user=zagg_user, password=zagg_password, ssl_verify=zagg_ssl_verify, verbose=zagg_verbose, ) host = self.args.host if self.args.host else self.config['host']['name'] self.zagg_sender = ZaggSender(host, zagg_conn)
def config_zagg_sender(self): """ configure the zagg_sender """ zagg_url = self.args.zagg_url if self.args.zagg_url else self.config["zagg"]["url"] zagg_user = self.args.zagg_user if self.args.zagg_user else self.config["zagg"]["user"] zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config["zagg"]["pass"] zagg_verbose = self.args.verbose if self.args.verbose else self.config["zagg"]["verbose"] zagg_debug = self.args.debug if self.args.debug else self.config["zagg"]["debug"] zagg_ssl_verify = self.args.zagg_ssl_verify if self.args.zagg_ssl_verify else self.config["zagg"]["ssl_verify"] host = self.args.host if self.args.host else self.config["host"]["name"] if isinstance(zagg_verbose, str): zagg_verbose = zagg_verbose == "True" if isinstance(zagg_debug, str): zagg_debug = zagg_debug == "True" if isinstance(zagg_ssl_verify, str): zagg_ssl_verify = zagg_ssl_verify == "True" zagg_conn = ZaggConnection( url=zagg_url, user=zagg_user, password=zagg_password, ssl_verify=zagg_ssl_verify, debug=zagg_debug ) self.zagg_sender = ZaggSender(host, zagg_conn, zagg_verbose, zagg_debug)
def config_zagg_sender(self): """ configure the zagg_sender """ zagg_url = self.args.zagg_url if self.args.zagg_url else self.config['zagg']['url'] zagg_user = self.args.zagg_user if self.args.zagg_user else self.config['zagg']['user'] zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config['zagg']['pass'] zagg_verbose = self.args.verbose if self.args.verbose else self.config['zagg']['verbose'] zagg_debug = self.args.debug if self.args.debug else self.config['zagg']['debug'] zagg_ssl_verify = self.args.zagg_ssl_verify if self.args.zagg_ssl_verify else self.config['zagg']['ssl_verify'] host = self.args.host if self.args.host else self.config['host']['name'] if isinstance(zagg_verbose, str): zagg_verbose = (zagg_verbose == 'True') if isinstance(zagg_debug, str): zagg_debug = (zagg_debug == 'True') if isinstance(zagg_ssl_verify, str): zagg_ssl_verify = (zagg_ssl_verify == 'True') zagg_conn = ZaggConnection(url=zagg_url, user=zagg_user, password=zagg_password, ssl_verify=zagg_ssl_verify, debug=zagg_debug, ) self.zagg_sender = ZaggSender(host, zagg_conn, zagg_verbose, zagg_debug)
def main(): ''' Do the application creation ''' proj_name = 'ops-monitor-appbuild' + os.environ['ZAGG_CLIENT_HOSTNAME'] app = 'nodejs-example' verbose = True start_time = time.time() if proj_name in OpenShiftOC.get_projects(verbose): OpenShiftOC.delete_project(proj_name, verbose) OpenShiftOC.new_project(proj_name, verbose) OpenShiftOC.new_app(app, proj_name, verbose) #1 is error create_app = 1 BuildTime = 0 CreateTime = 0 # Now we wait until the pod comes up for _ in range(24): time.sleep(10) #checking the building pod buildPod = OpenShiftOC.get_build_pod(app, proj_name, verbose) if buildPod and buildPod['status']['phase'] == 'Failed': BuildTime = time.time() - start_time print 'fail' break if buildPod and buildPod['status']['phase'] == 'Succeeded': BuildTime = time.time() - start_time for _ in range(24): time.sleep(5) create_app = check_route(app, proj_name, verbose) if create_app == 0: CreateTime = time.time() - start_time print 'success' print 'Time: %s' % CreateTime print 'BuildTime: %s' % BuildTime break if create_app == 0: break else: BuildTime = time.time() - start_time print 'BuildTime: %s' % BuildTime print 'fail' if proj_name in OpenShiftOC.get_projects(verbose): OpenShiftOC.delete_project(proj_name, verbose) zgs = ZaggSender() zgs.add_zabbix_keys({'openshift.master.app.build.create': create_app}) zgs.add_zabbix_keys({'openshift.master.app.create.time': CreateTime}) zgs.add_zabbix_keys({'openshift.master.app.build.time': BuildTime}) zgs.send_metrics()
def main(): ''' Do the application creation ''' proj_name = 'ops-monitor-' + os.environ['ZAGG_CLIENT_HOSTNAME'] app = 'openshift/hello-openshift:v1.0.6' verbose = False if len(sys.argv) > 1 and sys.argv[1] == '-v': verbose = True start_time = time.time() if proj_name in OpenShiftOC.get_projects(verbose): OpenShiftOC.delete_project(proj_name, verbose) OpenShiftOC.new_project(proj_name, verbose) OpenShiftOC.new_app(app, proj_name, verbose) create_app = 1 # Now we wait until the pod comes up for _ in range(24): time.sleep(5) pod = OpenShiftOC.get_pod('hello-openshift', proj_name, verbose) if pod and pod['status']: if verbose: print pod['status']['phase'] if pod and pod['status']['phase'] == 'Running' and pod['status'].has_key('podIP'): #c_results = curl(pod['status']['podIP'], '8080') #if c_results == 'Hello OpenShift!\n': if verbose: print 'success' print 'Time: %s' % str(time.time() - start_time) create_app = 0 break else: if verbose: print 'Time: %s' % str(time.time() - start_time) print 'fail' if proj_name in OpenShiftOC.get_projects(verbose): OpenShiftOC.delete_project(proj_name, verbose) zgs = ZaggSender() zgs.add_zabbix_keys({'openshift.master.app.create': create_app}) zgs.send_metrics()
def run(self): """ Main function to run the check """ self.parse_args() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) if self.check_dns_port_alive(): self.get_openshift_services() self.do_dns_check() self.zagg_sender.send_metrics()
def main(): """ Get data from oadm and send to zabbix """ ## set oadm config oadm_command = "KUBECONFIG=/etc/openshift/master/admin.kubeconfig /usr/bin/oadm" ## get list of running pods podlist_cmd = oadm_command + " manage-node --list-pods --selector=''" # get the output of oadm output = subprocess.check_output(podlist_cmd, shell=True) # pare down to only lines that contain "Running" running_pods_list = [p for p in output.split("\n") if "Running" in p] # we now have all the data we want. Let's send it to Zagg zs = ZaggSender() zs.add_zabbix_keys({"running_pods_count": len(running_pods_list)}) # Finally, sent them to zabbix zs.send_metrics()
def main(): """ Main function to run the check """ args = parse_args() zagg_sender = ZaggSender(verbose=args.verbose, debug=args.debug) discovery_key_disk = 'disc.disk' interval = 3 pcp_disk_dev_metrics = ['disk.dev.total', 'disk.dev.avactive'] item_prototype_macro_disk = '#OSO_DISK' item_prototype_key_tps = 'disc.disk.tps' item_prototype_key_putil = 'disc.disk.putil' disk_metrics = pminfo.get_sampled_data(pcp_disk_dev_metrics, interval, 2) pcp_metrics_divided = {} for metric in pcp_disk_dev_metrics: pcp_metrics_divided[metric] = {k: v for k, v in disk_metrics.items() if metric in k} # do TPS checks; use disk.dev.total filtered_disk_totals = clean_up_metric_dict(pcp_metrics_divided[pcp_disk_dev_metrics[0]], pcp_disk_dev_metrics[0] + '.') # Add dynamic items zagg_sender.add_zabbix_dynamic_item(discovery_key_disk, item_prototype_macro_disk, filtered_disk_totals.keys()) # calculate the TPS and add them to the ZaggSender for disk, totals in filtered_disk_totals.iteritems(): disk_tps = (totals[1] - totals[0]) / interval zagg_sender.add_zabbix_keys({'%s[%s]' % (item_prototype_key_tps, disk): disk_tps}) # do % Util checks; use disk.dev.avactive filtered_disk_totals = clean_up_metric_dict(pcp_metrics_divided[pcp_disk_dev_metrics[1]], pcp_disk_dev_metrics[1] + '.') # calculate the % Util and add them to the ZaggSender for disk, totals in filtered_disk_totals.iteritems(): total_active = (float)(totals[1] - totals[0]) / 1000.0 putil = 100 * total_active / interval zagg_sender.add_zabbix_keys({'%s[%s]' % (item_prototype_key_putil, disk): putil}) zagg_sender.send_metrics()
def run(self): """ Main function to run the check """ self.parse_args() self.ora = OpenshiftRestApi() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) try: self.get_pods() except Exception as ex: print "Problem retreiving pod data: %s " % ex.message self.zagg_sender.send_metrics()
def config_zagg_sender(self): """ configure the zagg_sender """ zagg_server = self.args.zagg_server if self.args.zagg_server else self.config['zagg']['host'] zagg_user = self.args.zagg_user if self.args.zagg_user else self.config['zagg']['user'] zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config['zagg']['pass'] host = self.args.host if self.args.host else self.config['host']['name'] zagg_conn = ZaggConnection(host=zagg_server, user=zagg_user, password=zagg_password, ) self.zagg_sender = ZaggSender(host, zagg_conn)
class OpenshiftSkyDNSZaggClient(object): """ Checks for the Openshift Master SkyDNS """ def __init__(self): self.args = None self.zagg_sender = None self.ora = OpenshiftRestApi() self.dns_host = '127.0.0.1' self.dns_port = 53 self.openshift_services = [] def run(self): """ Main function to run the check """ self.parse_args() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) if self.check_dns_port_alive(): self.get_openshift_services() self.do_dns_check() self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Network metric sender') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') self.args = parser.parse_args() def check_dns_port_alive(self): """ Verify that the DNS port (TCP 53) is alive """ print "\nPerforming Openshift DNS port check..." try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(1) s.connect((self.dns_host, self.dns_port)) s.close() print "\nOpenshift SkyDNS host: %s, port: %s is OPEN" % (self.dns_host, self.dns_port) print "================================================\n" self.zagg_sender.add_zabbix_keys({'openshift.master.skydns.port.open' : 1}) return True except socket.error, e: print "\nOpenshift SkyDNS host: %s, port: %s is CLOSED" % (self.dns_host, self.dns_port) print "Python Error: %s" % e print "================================================\n" self.zagg_sender.add_zabbix_keys({'openshift.master.skydns.port.open' : 0}) return False
def main(): """ Gather and send details on all visible S3 buckets """ discovery_key = "disc.aws" discovery_macro = "#S3_BUCKET" prototype_s3_size = "disc.aws.size" prototype_s3_count = "disc.aws.objects" args = parse_args() ocutil = OCUtil() oc_yaml = ocutil.get_secrets("dockerregistry") aws_access, aws_secret = get_aws_creds(oc_yaml) awsutil = AWSUtil(aws_access, aws_secret, args.debug) bucket_list = awsutil.get_bucket_list(args.debug) bucket_stats = {} for bucket in bucket_list: s3_size, s3_objects = awsutil.get_bucket_info(bucket, args.debug) bucket_stats[bucket] = {"size": s3_size, "objects": s3_objects} if args.debug: print "Bucket stats: " + str(bucket_stats) if args.test: print "Test-only. Received results: " + str(bucket_stats) else: zgs = ZaggSender(verbose=args.debug) zgs.add_zabbix_dynamic_item(discovery_key, discovery_macro, bucket_list) for bucket in bucket_stats.keys(): zab_key = "{}[{}]".format(prototype_s3_size, bucket) zgs.add_zabbix_keys({zab_key: int(round(bucket_stats[bucket]["size"]))}) zab_key = "{}[{}]".format(prototype_s3_count, bucket) zgs.add_zabbix_keys({zab_key: bucket_stats[bucket]["objects"]}) zgs.send_metrics()
def main(): """ Main function to run the check """ args = parse_args() zagg_sender = ZaggSender(verbose=args.verbose, debug=args.debug) discovery_key_network = 'disc.network' pcp_network_dev_metrics = ['network.interface.in.bytes', 'network.interface.out.bytes'] item_proto_macro_network = '#OSO_NET_INTERFACE' item_proto_key_in_bytes = 'disc.network.in.bytes' item_proto_key_out_bytes = 'disc.network.out.bytes' network_metrics = pminfo.get_metrics(pcp_network_dev_metrics) pcp_metrics_divided = {} for metric in pcp_network_dev_metrics: pcp_metrics_divided[metric] = {k: v for k, v in network_metrics.items() if metric in k} # do Network In; use network.interface.in.bytes filtered_network_totals = clean_up_metric_dict(pcp_metrics_divided[pcp_network_dev_metrics[0]], pcp_network_dev_metrics[0] + '.') # Add dynamic items zagg_sender.add_zabbix_dynamic_item(discovery_key_network, item_proto_macro_network, filtered_network_totals.keys()) # Report Network IN bytes; them to the ZaggSender for interface, total in filtered_network_totals.iteritems(): zagg_sender.add_zabbix_keys({'%s[%s]' % (item_proto_key_in_bytes, interface): total}) # Report Network OUT Bytes; use network.interface.out.bytes filtered_network_totals = clean_up_metric_dict(pcp_metrics_divided[pcp_network_dev_metrics[1]], pcp_network_dev_metrics[1] + '.') # calculate the % Util and add them to the ZaggSender for interface, total in filtered_network_totals.iteritems(): zagg_sender.add_zabbix_keys({'%s[%s]' % (item_proto_key_out_bytes, interface): total}) zagg_sender.send_metrics()
def run(self): """ Main function to run the check """ self.parse_args() self.ora = OpenshiftRestApi() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) try: self.get_service() status = self.check_service() except Exception as ex: print "Problem retreiving data: %s " % ex.message self.zagg_sender.add_zabbix_keys({ "openshift.webservice.{}.status".format(self.args.pod) : status}) self.zagg_sender.send_metrics()
def report_to_zabbix(self, disc_key, disc_macro, item_proto_key, value): """ Sends the commands exit code to zabbix. """ zs = ZaggSender() # Add the dynamic item self.verbose_print("Adding the dynamic item to Zabbix - %s, %s, [%s]" % \ (disc_key, disc_macro, self.args.name)) zs.add_zabbix_dynamic_item(disc_key, disc_macro, [self.args.name]) # Send the value for the dynamic item self.verbose_print("Sending metric to Zabbix - %s[%s]: %s" % \ (item_proto_key, self.args.name, value)) zs.add_zabbix_keys({'%s[%s]' % (item_proto_key, self.args.name): value}) # Actually send them zs.send_metrics()
def main(): """ Get data and send to zabbix """ vswitch_ports_count = get_vswitch_ports() vswitch_pids_count = get_vswitch_pids() print "Found %s OVS ports" % vswitch_ports_count print "Found %s OVS pids" % vswitch_pids_count # we now have all the data we want. Let's send it to Zagg zs = ZaggSender() zs.add_zabbix_keys({"openshift.node.ovs.ports.count": vswitch_ports_count}) zs.add_zabbix_keys({"openshift.node.ovs.pids.count": vswitch_pids_count}) # Finally, sent them to zabbix zs.send_metrics()
def run(self): """ Main function to run the check """ self.parse_args() self.get_kubeconfig() ocutil = OCUtil(config_file=self.kubeconfig, verbose=self.args.verbose) self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) try: oc_yaml = ocutil.get_service('docker-registry') self.get_registry_service(oc_yaml) oc_yaml = ocutil.get_endpoint('docker-registry') self.get_registry_endpoints(oc_yaml) except Exception as ex: print "Problem retreiving registry IPs: %s " % ex.message self.registry_service_check() self.registry_health_check() self.zagg_sender.send_metrics()
def run(self): """ Main function to run the check """ self.parse_args() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) try: if self.args.healthz or self.args.all_checks: self.healthz_check() except Exception as ex: print "Problem performing healthz check: %s " % ex.message self.zagg_sender.add_zabbix_keys({'openshift.master.api.healthz' : 'false'}) try: if self.args.api_ping or self.args.all_checks: self.api_ping() if self.args.project_count or self.args.all_checks: self.project_count() if self.args.pod_count or self.args.all_checks: self.pod_count() if self.args.user_count or self.args.all_checks: self.user_count() except Exception as ex: print "Problem Openshift API checks: %s " % ex.message self.zagg_sender.add_zabbix_keys({'openshift.master.api.ping' : 0}) # Openshift API is down try: if self.args.metrics or self.args.all_checks: self.metric_check() except Exception as ex: print "Problem getting Openshift metrics at /metrics: %s " % ex.message self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping' : 0}) # Openshift Metrics are down self.zagg_sender.send_metrics()
def main(): '''Run pminfo against a list of metrics. Sample metrics passed in for an amount of time and report data to zabbix ''' args, parser = parse_args() if not args.metrics: print print 'Please specify metrics with -m.' print parser.print_help() sys.exit(1) metrics = args.metrics interval = int(args.interval) count = int(args.count) # Gather sampled data data = pminfo.get_sampled_data(metrics, interval, count) zab_results = collections.defaultdict(list) for metric_name, val in data.items(): if 'kernel' in metric_name: for sample in range(len(val)): if sample + 1 == len(val): break zab_results[metric_name].append(pminfo.calculate_percent_cpu(val[sample], val[sample+1], interval)) else: print 'NOT SUPPORTED: [%s]' % metric_name if zab_results.get(metric_name, None) != None and (args.verbose or args.debug): print '%s: %.2f' % (metric_name, zab_results[metric_name][-1]) zab_results = get_averages(zab_results) # Send the data to zabbix if not args.test: zgs = ZaggSender(verbose=args.debug) zgs.add_zabbix_keys(zab_results) zgs.send_metrics()
def main(): ''' Get data from oadm and send to zabbix ''' ## set oadm config oadm_command = "KUBECONFIG=/etc/openshift/master/admin.kubeconfig /usr/bin/oadm" ## get list of running pods podlist_cmd = oadm_command + " manage-node --list-pods --selector=''" # get the output of oadm output = subprocess.check_output(podlist_cmd, shell=True) # pare down to only lines that contain "Running" running_pods_list = [p for p in output.split('\n') if "Running" in p] # we now have all the data we want. Let's send it to Zagg zs = ZaggSender() zs.add_zabbix_keys({'running_pods_count': len(running_pods_list)}) # Finally, sent them to zabbix zs.send_metrics()
def main(self): """ Main function. """ zag = ZaggSender() yaml_config = {} config_path = '/etc/openshift_tools/rkhunter_config.yaml' if os.path.isfile(config_path): with open(config_path, 'r') as rkhunter_config: yaml_config = yaml.load(rkhunter_config) logfile = yaml_config["logfile"] checks = { "rkhunter.found.warning": r"\[ warning \]", "rkhunter.found.infection": r"INFECTED$" } for zabbix_key, search_term in checks.iteritems(): scan_status = self.check_rkhunter(search_term, logfile) zag.add_zabbix_keys({zabbix_key: scan_status}) zag.send_metrics()
def main(): """ Main function to run the check """ argz = parse_args() conn_count = 0 for proc in psutil.process_iter(): try: if proc.name() == argz.proc_to_check: if argz.debug: print proc.connections() for conn in proc.connections(): if conn.status == argz.conn_status and conn.laddr[ 1] == argz.port: conn_count += 1 except psutil.NoSuchProcess: pass if argz.debug: print 'Process ({0}) on port {1} has {2} connections in {3} status'.format( argz.proc_to_check, argz.port, conn_count, argz.conn_status) zgs = ZaggSender(debug=argz.debug) zgs.add_zabbix_keys({'{0}'.format(argz.zabbix_key): conn_count}) zgs.send_metrics()
from openshift_tools.monitoring.zagg_sender import ZaggSender ZBX_KEY = "docker.container.dns.resolution" if __name__ == "__main__": cli = AutoVersionClient(base_url='unix://var/run/docker.sock') container_id = os.environ['container_uuid'] container = cli.create_container(image=cli.inspect_container(container_id)['Image'], command='getent hosts redhat.com') cli.start(container=container.get('Id')) exit_code = cli.wait(container) for i in range(0, 3): try: cli.remove_container(container.get('Id')) break except APIError: print "Error while cleaning up container." time.sleep(5) zs = ZaggSender() zs.add_zabbix_keys({ZBX_KEY: exit_code}) print "Sending these metrics:" print ZBX_KEY + ": " + str(exit_code) zs.send_metrics() print "\nDone.\n"
class OpenshiftRouterChecks(object): """Checks for the Openshift Router""" def __init__(self): self.args = None self.zgs = None # zagg sender self.kubeconfig = None self.parse_args() self.get_kubeconfig() self.ocutil = None def get_kubeconfig(self): """Find kubeconfig to use for OCUtil""" # Default master kubeconfig kubeconfig = '/tmp/admin.kubeconfig' non_master_kube_dir = '/etc/origin/node' if os.path.exists(kubeconfig): # If /tmp/admin.kubeconfig exists, use it! pass elif os.path.isdir(non_master_kube_dir): for my_file in os.listdir(non_master_kube_dir): if my_file.endswith(".kubeconfig"): kubeconfig = os.path.join(non_master_kube_dir, my_file) if self.args.debug: print "Using kubeconfig: {}".format(kubeconfig) self.kubeconfig = kubeconfig def check_all_router_health(self): """ Perform defined router health check on all routers """ discovery_key = "disc.openshift.cluster.router" discovery_macro = "#OS_ROUTER" router_health_item = "disc.openshift.cluster.router.health" router_pods = self.find_router_pods() health_report = {} for router_name, pod_details in router_pods.iteritems(): health = self.router_pod_healthy(pod_details) if self.args.verbose: print "{} healthy: {}\n".format(router_name, health) health_report[router_name] = health # make dynamic items, and queue up the associated data router_names = health_report.keys() self.zgs.add_zabbix_dynamic_item(discovery_key, discovery_macro, router_names, synthetic=True) for router_name, health_status in health_report.iteritems(): zbx_key = "{}[{}]".format(router_health_item, router_name) self.zgs.add_zabbix_keys({zbx_key: int(health_status)}, synthetic=True) def running_pod_count_check(self): """ return hash of deployment configs containing whether the number of running pods matches the definition in the deployment config """ router_pods = self.find_router_pods() # get actual running pod count (per DC) dc_pod_count = {} for _, details in router_pods.iteritems(): dc_name = details['metadata']['labels']['deploymentconfig'] dc_pod_count[dc_name] = dc_pod_count.get(dc_name, 0) + 1 if self.args.debug: print "Running pod count: {}".format(dc_pod_count) # get expected pod count as defined in each router DC expected_pod_count = {} for dc_name in dc_pod_count.keys(): expected_pod_count[dc_name] = self.ocutil.get_dc( dc_name)['spec']['replicas'] if self.args.debug: print "Expected pod count: {}".format(expected_pod_count) results = {} for dc_name in dc_pod_count.keys(): results[dc_name] = bool( dc_pod_count[dc_name] == expected_pod_count[dc_name]) if self.args.verbose or self.args.debug: print "DC replica count matching actual counts: {}".format(results) return results def check_router_replica_count(self): """ Check whether the running router replica count is the same as what is defined in the deployment config """ discovery_key = "disc.openshift.cluster.router" discovery_macro = "#ROUTER_DC" dc_status_item = "disc.openshift.cluster.router.expected_pod_count" replica_results = self.running_pod_count_check() # make dynamic items, and queue up the associated data dc_names = replica_results.keys() self.zgs.add_zabbix_dynamic_item(discovery_key, discovery_macro, dc_names, synthetic=True) for dc_name, replica_status in replica_results.iteritems(): zbx_key = "{}[{}]".format(dc_status_item, dc_name) self.zgs.add_zabbix_keys({zbx_key: int(replica_status)}, synthetic=True) def run(self): """Main function to run the check""" self.ocutil = OCUtil(config_file=self.kubeconfig, verbose=self.args.verbose) self.zgs = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) self.check_all_router_health() self.check_router_replica_count() if self.args.dry_run: self.zgs.print_unique_metrics_key_value() else: self.zgs.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Openshift Router sender') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') parser.add_argument('--dry-run', action='store_true', default=False, help='Collect stats, but no report to zabbix') self.args = parser.parse_args() @staticmethod def get_router_health_url(router): """ build router healthcheck URL """ podip = router['status']['podIP'] port = router['spec']['containers'][0]['livenessProbe']['httpGet'][ 'port'] path = router['spec']['containers'][0]['livenessProbe']['httpGet'][ 'path'] url = 'http://{}:{}{}'.format(podip, port, path) return url @staticmethod def router_pod_healthy(router): """ ping the health port for router pod health """ url = OpenshiftRouterChecks.get_router_health_url(router) try: result = urllib2.urlopen(url).getcode() if result == 200: return True else: return False except (urllib2.HTTPError, urllib2.URLError): return False def find_router_pods(self): """ return dict of PODs running haproxy (the router pods) """ router_pods = {} for pod in self.ocutil.get_pods()['items']: try: img = pod['status']['containerStatuses'][0]['image'] if 'ose-haproxy-router' in img: router_pods[pod['metadata']['name']] = pod except KeyError: pass return router_pods
class DockerContainerUsageCli(object): ''' This is the class that actually pulls eveyrthing together into a cli script. ''' def __init__(self, config_file=None): if not config_file: self.config_file = '/etc/openshift_tools/container_metrics.yml' else: self.config_file = config_file self.config = None self.parse_config() self.cli = AutoVersionClient(base_url='unix://var/run/docker.sock', timeout=120) self.docker_util = DockerUtil(self.cli) self.zagg_sender = ZaggSender(verbose=True) def parse_config(self): """ parse config file """ if not self.config: if not os.path.exists(self.config_file): raise IOError(self.config_file + " does not exist.") self.config = yaml.load(file(self.config_file)) def format_ctr_name(self, ctr_name): ''' Takes a container name and if there's a name_format_regex specified, it applies it ''' for item in self.config['usage_checks']: name_match_regex = item['name_match_regex'] if item.has_key('name_format_regex') and re.match(name_match_regex, ctr_name): try: name_format_regex = item['name_format_regex'] new_name = re.sub(name_match_regex, name_format_regex, ctr_name) return new_name except sre_constants.error as ex: # Just use the full name (we don't want to die because of name formatting) print "\nError: %s: [%s]. Using full name [%s].\n" % (ex.message, name_format_regex, ctr_name) return ctr_name return ctr_name def main(self): ''' The main entrypoint of the cli ''' ctr_regexes = [uchk['name_match_regex'] for uchk in self.config['usage_checks']] use_cgroups = self.config.get('use_cgroups', False) ctrs = self.docker_util.get_ctrs_matching_names(ctr_regexes) for ctr_name, ctr in ctrs.iteritems(): (cpu_stats, mem_stats) = self.docker_util.get_ctr_stats(ctr, use_cgroups=use_cgroups) formatted_ctr_name = self.format_ctr_name(ctr_name) # Add the container hostnames as macros for the dynamic item. self.zagg_sender.add_zabbix_dynamic_item(ZBX_DOCKER_DISC_KEY, ZBX_DOCKER_DISC_MACRO, [formatted_ctr_name]) data = { '%s[%s]' % (ZBX_CTR_CPU_USED_PCT_KEY, formatted_ctr_name): cpu_stats.used_pct, '%s[%s]' % (ZBX_CTR_MEM_USED_KEY, formatted_ctr_name): mem_stats.used, '%s[%s]' % (ZBX_CTR_MEM_LIMIT_KEY, formatted_ctr_name): mem_stats.limit, '%s[%s]' % (ZBX_CTR_MEM_LIMIT_USED_PCT_KEY, formatted_ctr_name): mem_stats.limit_used_pct, '%s[%s]' % (ZBX_CTR_MEM_FAILCNT_KEY, formatted_ctr_name): mem_stats.failcnt, } print "%s:" % formatted_ctr_name for k, v in data.iteritems(): print " %s: %s" % (k, v) print self.zagg_sender.add_zabbix_keys(data) # Actually send the metrics self.zagg_sender.send_metrics()
def send_zagg_data(build_ran, create_app, http_code, run_time): ''' send data to Zagg''' zgs_time = time.time() zgs = ZaggSender() print "Send data to Zagg" if build_ran == 1: zgs.add_zabbix_keys({'openshift.master.app.build.create': create_app}) zgs.add_zabbix_keys( {'openshift.master.app.build.create.code': http_code}) zgs.add_zabbix_keys( {'openshift.master.app.build.create.time': run_time}) else: zgs.add_zabbix_keys({'openshift.master.app.create': create_app}) zgs.add_zabbix_keys({'openshift.master.app.create.code': http_code}) zgs.add_zabbix_keys({'openshift.master.app.create.time': run_time}) try: zgs.send_metrics() except: print "Error sending to Zagg: %s \n %s " % sys.exc_info( )[0], sys.exc_info()[1] print "Data sent in %s seconds" % str(time.time() - zgs_time)
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # #This is not a module, but pylint thinks it is. This is a command. #pylint: disable=invalid-name from openshift_tools.monitoring.zagg_sender import ZaggSender from openshift_tools.monitoring import pminfo FILESYSTEM_METRIC = ['filesys.full'] DISCOVERY_KEY_FS = 'disc.filesys' ITEM_PROTOTYPE_MACRO_FS = '#OSO_FILESYS' ITEM_PROTOTYPE_KEY_FULL = 'disc.filesys.full' FILESYS_METRICS = pminfo.get_metrics(FILESYSTEM_METRIC) FILTERED_FILESYS_METRICS = {k.replace('filesys.full.', ''):v for (k, v) in FILESYS_METRICS.iteritems() if 'docker' not in k} ZS = ZaggSender() ZS.add_zabbix_dynamic_item(DISCOVERY_KEY_FS, ITEM_PROTOTYPE_MACRO_FS, FILTERED_FILESYS_METRICS.keys()) for filesys_name, filesys_full in FILTERED_FILESYS_METRICS.iteritems(): ZS.add_zabbix_keys({'%s[%s]' % (ITEM_PROTOTYPE_KEY_FULL, filesys_name): filesys_full}) ZS.send_metrics()
class OpsZaggPCPClient(object): """ class to send data to zagg """ def __init__(self): self.zagg_sender = None self.args = None self.config = None self.pcp_metrics = [] self.heartbeat = None def run(self): """ main function to run the script """ self.parse_args() self.parse_config(self.args.config_file) self.config_zagg_sender() if self.args.metrics: self.add_metrics() self.add_metrics_from_config() self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Zagg PCP metric sender') parser.add_argument('--send-pcp-metrics', help="send pcp metrics to zagg", action="store_true") parser.add_argument('-m', '--metrics', help="send PCP metrics to zagg") parser.add_argument('-s', '--host', help='specify host name as registered in Zabbix') parser.add_argument('-z', '--zagg-url', help='url of Zagg server') parser.add_argument('--zagg-user', help='username of the Zagg server') parser.add_argument('--zagg-pass', help='Password of the Zagg server') parser.add_argument('--zagg-ssl-verify', default=None, help='Whether to verify ssl certificates.') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') parser.add_argument('-c', '--config-file', help='ops-zagg-client config file', default='/etc/openshift_tools/zagg_client.yaml') self.args = parser.parse_args() def parse_config(self, config_file): """ parse config file """ self.config = yaml.load(file(config_file)) def config_zagg_sender(self): """ configure the zagg_sender """ zagg_url = self.args.zagg_url if self.args.zagg_url else self.config['zagg']['url'] zagg_user = self.args.zagg_user if self.args.zagg_user else self.config['zagg']['user'] zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config['zagg']['pass'] zagg_verbose = self.args.verbose if self.args.verbose else self.config['zagg']['verbose'] zagg_debug = self.args.debug if self.args.debug else self.config['zagg']['debug'] zagg_ssl_verify = self.args.zagg_ssl_verify if self.args.zagg_ssl_verify else self.config['zagg']['ssl_verify'] host = self.args.host if self.args.host else self.config['host']['name'] if isinstance(zagg_verbose, str): zagg_verbose = (zagg_verbose == 'True') if isinstance(zagg_debug, str): zagg_debug = (zagg_debug == 'True') if isinstance(zagg_ssl_verify, str): zagg_ssl_verify = (zagg_ssl_verify == 'True') zagg_conn = ZaggConnection(url=zagg_url, user=zagg_user, password=zagg_password, ssl_verify=zagg_ssl_verify, debug=zagg_debug, ) self.zagg_sender = ZaggSender(host, zagg_conn, zagg_verbose, zagg_debug) def add_metrics_from_config(self): """ collect pcp metrics from a config file. Add to send to ZaggSender """ self.add_pcp_to_zagg_sender(self.config['pcp']['metrics']) def add_metrics(self): """ collect pcp metrics to send to ZaggSender """ metric_list = self.args.metrics.split(',') self.add_pcp_to_zagg_sender(metric_list) def add_pcp_to_zagg_sender(self, pcp_metrics): """ something pcp yada yada """ pcp_metric_dict = pminfo.get_metrics(metrics=pcp_metrics, derived_metrics=None) self.zagg_sender.add_zabbix_keys(pcp_metric_dict)
def report_to_zabbix(self, total_snapshottable_vols, total_snapshots_created, total_snapshot_creation_errors): """ Sends the commands exit code to zabbix. """ zs = ZaggSender(verbose=True) # Populate EBS_SNAPSHOTTER_DISC_SCHEDULE_MACRO with the schedule zs.add_zabbix_dynamic_item(EBS_SNAPSHOTTER_DISC_KEY, EBS_SNAPSHOTTER_DISC_SCHEDULE_MACRO, \ [self.args.with_schedule]) # Send total_snapshottable_vols prototype item key and value zs.add_zabbix_keys({'%s[%s]' % (EBS_SNAPSHOTTER_SNAPSHOTTABLE_VOLUMES_KEY, self.args.with_schedule): \ total_snapshottable_vols}) # Send total_snapshots_created prototype item key and value zs.add_zabbix_keys({'%s[%s]' % (EBS_SNAPSHOTTER_SNAPSHOTS_CREATED_KEY, self.args.with_schedule): \ total_snapshots_created}) # Send total_snapshot_creation_errors prototype item key and value zs.add_zabbix_keys({'%s[%s]' % (EBS_SNAPSHOTTER_SNAPSHOT_CREATION_ERRORS_KEY, self.args.with_schedule): \ total_snapshot_creation_errors}) # Actually send them zs.send_metrics()
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # #This is not a module, but pylint thinks it is. This is a command. #pylint: disable=invalid-name from openshift_tools.monitoring.zagg_sender import ZaggSender from openshift_tools.monitoring import pminfo ZS = ZaggSender(verbose=True) FILESYS_FULL_METRIC = ['filesys.full'] FILESYS_INODE_DERIVED_METRICS = {'filesys.inodes.pused' : 'filesys.usedfiles / (filesys.usedfiles + filesys.freefiles) * 100' } DISCOVERY_KEY_FS = 'disc.filesys' ITEM_PROTOTYPE_MACRO_FS = '#OSO_FILESYS' ITEM_PROTOTYPE_KEY_FULL = 'disc.filesys.full' ITEM_PROTOTYPE_KEY_INODE = 'disc.filesys.inodes.pused' def filter_out_docker_filesystems(metric_dict, filesystem_filter): """ Simple filter to elimate unnecessary characters in the key name """ filtered_dict = {k.replace(filesystem_filter, ''):v for (k, v) in metric_dict.iteritems()
# Work around because loopback lies about it's actual total space if not dds.is_loopback: dds.data_space_total = dds.data_space_used + dds.data_space_available dds.metadata_space_total = dds.metadata_space_used + dds.metadata_space_available dds.data_space_percent_available = (dds.data_space_available / dds.data_space_total) * 100 dds.metadata_space_percent_available = (dds.metadata_space_available / dds.metadata_space_total) * 100 return dds if __name__ == "__main__": keys = None exit_code = 0 zs = ZaggSender() try: cli = AutoVersionClient(base_url='unix://var/run/docker.sock') dw = DockerWatcher(cli) dw_dds = dw.get_disk_usage() keys = { 'docker.storage.data.space.used': dw_dds.data_space_used, 'docker.storage.data.space.available': dw_dds.data_space_available, 'docker.storage.data.space.percent_available': dw_dds.data_space_percent_available, 'docker.storage.data.space.total': dw_dds.data_space_total, 'docker.storage.metadata.space.used': dw_dds.metadata_space_used, 'docker.storage.metadata.space.available': dw_dds.metadata_space_available, 'docker.storage.metadata.space.percent_available': dw_dds.metadata_space_percent_available, 'docker.storage.metadata.space.total': dw_dds.metadata_space_total,
def send_zagg_data(build_ran, create_app, http_code, run_time): ''' send data to Zagg''' zgs = ZaggSender() print "Send data to Zagg" if build_ran == 1: zgs.add_zabbix_keys({'openshift.master.app.build.create': create_app}) zgs.add_zabbix_keys( {'openshift.master.app.build.create.code': http_code}) zgs.add_zabbix_keys( {'openshift.master.app.build.create.time': run_time}) else: zgs.add_zabbix_keys({'openshift.master.app.create': create_app}) zgs.add_zabbix_keys({'openshift.master.app.create.code': http_code}) zgs.add_zabbix_keys({'openshift.master.app.create.time': run_time}) zgs.send_metrics()
class OpenshiftMasterZaggClient(object): """ Checks for the Openshift Master """ def __init__(self): self.args = None self.zagg_sender = None self.ora = OpenshiftRestApi() def run(self): """ Main function to run the check """ self.parse_args() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) try: if self.args.healthz or self.args.all_checks: self.healthz_check() except Exception as ex: print "Problem performing healthz check: %s " % ex.message self.zagg_sender.add_zabbix_keys( {'openshift.master.api.healthz': 'false'}) try: if self.args.api_ping or self.args.all_checks: self.api_ping() if self.args.project_count or self.args.all_checks: self.project_count() if self.args.pod_count or self.args.all_checks: self.pod_count() if self.args.user_count or self.args.all_checks: self.user_count() except Exception as ex: print "Problem Openshift API checks: %s " % ex.message self.zagg_sender.add_zabbix_keys({'openshift.master.api.ping': 0}) # Openshift API is down try: if self.args.metrics or self.args.all_checks: self.metric_check() except Exception as ex: print "Problem getting Openshift metrics at /metrics: %s " % ex.message self.zagg_sender.add_zabbix_keys( {'openshift.master.metric.ping': 0}) # Openshift Metrics are down self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Network metric sender') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') master_check_group = parser.add_argument_group( 'Different Checks to Perform') master_check_group.add_argument('--all-checks', action='store_true', default=None, help='Do all of the checks') master_check_group.add_argument( '--api-ping', action='store_true', default=None, help='Verify the Openshift API is alive') master_check_group.add_argument( '--healthz', action='store_true', default=None, help='Query the Openshift Master API /healthz') master_check_group.add_argument( '--metrics', action='store_true', default=None, help='Query the Openshift Master Metrics at /metrics') master_check_group.add_argument( '--project-count', action='store_true', default=None, help='Query the Openshift Master for Number of Pods') master_check_group.add_argument( '--pod-count', action='store_true', default=None, help='Query the Openshift Master for Number of Running Pods') master_check_group.add_argument( '--user-count', action='store_true', default=None, help='Query the Openshift Master for Number of Users') self.args = parser.parse_args() def api_ping(self): """ Verify the Openshift API health is responding correctly """ print "\nPerforming Openshift API ping check..." response = self.ora.get('/api/v1/nodes') print "\nOpenshift API ping is alive" print "Number of nodes in the Openshift cluster: %s" % len( response['items']) self.zagg_sender.add_zabbix_keys({ 'openshift.master.api.ping': 1, 'openshift.master.node.count': len(response['items']) }) def healthz_check(self): """ check the /healthz API call """ print "\nPerforming /healthz check..." response = self.ora.get('/healthz', rtype='text') print "healthz check returns: %s " % response self.zagg_sender.add_zabbix_keys( {'openshift.master.api.healthz': str('ok' in response).lower()}) def metric_check(self): """ collect certain metrics from the /metrics API call """ print "\nPerforming /metrics check..." response = self.ora.get('/metrics', rtype='text') for metric_type in text_string_to_metric_families(response): # Collect the apiserver_request_latencies_summary{resource="pods",verb="LIST",quantiles in /metrics # Collect the apiserver_request_latencies_summary{resource="pods",verb="WATCHLIST",quantiles in /metrics if metric_type.name == 'apiserver_request_latencies_summary': key_str = 'openshift.master.apiserver.latency.summary' for sample in metric_type.samples: if (sample[1]['resource'] == 'pods' and sample[1].has_key('quantile') and 'LIST' in sample[1]['verb']): curr_key_str = key_str + ".pods.quantile.%s.%s" % ( sample[1]['verb'], sample[1]['quantile'].split('.')[1]) if math.isnan(sample[2]): value = 0 else: value = sample[2] self.zagg_sender.add_zabbix_keys( {curr_key_str.lower(): int(value / 1000)}) # Collect the scheduler_e2e_scheduling_latency_microseconds{quantiles in /metrics if metric_type.name == 'scheduler_e2e_scheduling_latency_microseconds': for sample in metric_type.samples: if sample[1].has_key('quantile'): key_str = 'openshift.master.scheduler.e2e.scheduling.latency' curr_key_str = key_str + ".quantile.%s" % ( sample[1]['quantile'].split('.')[1]) if math.isnan(sample[2]): value = 0 else: value = sample[2] self.zagg_sender.add_zabbix_keys( {curr_key_str.lower(): int(value / 1000)}) self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping': 1}) # def project_count(self): """ check the number of projects in Openshift """ print "\nPerforming project count check..." excluded_names = [ 'openshift', 'openshift-infra', 'default', 'ops-monitor' ] response = self.ora.get('/oapi/v1/projects') project_names = [ project['metadata']['name'] for project in response['items'] ] valid_names = set(project_names) - set(excluded_names) print "Project count: %s" % len(valid_names) self.zagg_sender.add_zabbix_keys( {'openshift.project.count': len(valid_names)}) def pod_count(self): """ check the number of pods in Openshift """ print "\nPerforming pod count check..." response = self.ora.get('/api/v1/pods') # Get running pod count running_pod_count = 0 for i in response['items']: if 'containerStatuses' in i['status']: if 'running' in i['status']['containerStatuses'][0]['state']: running_pod_count += 1 # Get running pod count on compute only nodes (non-infra) running_user_pod_count = 0 for i in response['items']: if 'containerStatuses' in i['status']: if 'running' in i['status']['containerStatuses'][0]['state']: if 'nodeSelector' in i['spec']: if i['spec']['nodeSelector']['type'] == 'compute': running_user_pod_count += 1 print "Total pod count: %s" % len(response['items']) print "Running pod count: %s" % running_pod_count print "User Running pod count: %s" % running_user_pod_count self.zagg_sender.add_zabbix_keys({ 'openshift.master.pod.running.count': running_pod_count, 'openshift.master.pod.user.running.count': running_user_pod_count, 'openshift.master.pod.total.count': len(response['items']) }) def user_count(self): """ check the number of users in Openshift """ print "\nPerforming user count check..." response = self.ora.get('/oapi/v1/users') print "Total user count: %s" % len(response['items']) self.zagg_sender.add_zabbix_keys( {'openshift.master.user.count': len(response['items'])})
exit_code = cli.exec_inspect(exec_id)['ExitCode'] except APIError: # could race from getting a container list and the container exiting # before we can exec on it, so just ignore exited containers continue if exit_code == CMD_NOT_FOUND: continue print results print "Exit Code: " + str(exit_code) if exit_code != 0: bad_dns_count += 1 ctr_data = cli.inspect_container(ctr['Id']) print "Additional info: Namespace: {} Name: {} IP: {}".format( ctr['Labels'].get('io.kubernetes.pod.namespace', 'null'), ctr['Labels'].get('io.kubernetes.pod.name', 'null'), ctr_data['NetworkSettings']['IPAddress']) # Extra whitespace between output for each container print zs = ZaggSender() zs.add_zabbix_keys({ZBX_KEY: bad_dns_count}) print "Sending these metrics:" print ZBX_KEY + ": " + str(bad_dns_count) zs.send_metrics() print "\nDone.\n"
class OpenshiftMasterZaggClient(object): """ Checks for the Openshift Master """ def __init__(self): self.args = None self.zagg_sender = None self.ora = None self.zabbix_api_key = None self.zabbix_healthz_key = None def run(self): """ Main function to run the check """ self.parse_args() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) if self.args.local: self.ora = OpenshiftRestApi() self.args.api_ping = True self.args.healthz = True self.zabbix_api_key = 'openshift.master.local.api.ping' self.zabbix_healthz_key = 'openshift.master.local.api.healthz' else: master_cfg_from_yaml = [] with open('/etc/origin/master/master-config.yaml', 'r') as yml: master_cfg_from_yaml = yaml.load(yml) self.ora = OpenshiftRestApi(host=master_cfg_from_yaml['oauthConfig']['masterURL'], verify_ssl=True) self.zabbix_api_key = 'openshift.master.api.ping' self.zabbix_healthz_key = 'openshift.master.api.healthz' try: if self.args.healthz or self.args.all_checks: self.healthz_check() except Exception as ex: print "Problem performing healthz check: %s " % ex.message self.zagg_sender.add_zabbix_keys({self.zabbix_healthz_key: 'false'}) try: if self.args.api_ping or self.args.all_checks: self.api_ping() if self.args.project_count or self.args.all_checks: self.project_count() if self.args.pod_count or self.args.all_checks: self.pod_count() if self.args.user_count or self.args.all_checks: self.user_count() if self.args.pv_info or self.args.all_checks: self.pv_info() if self.args.nodes_not_ready or self.args.all_checks: self.nodes_not_ready() except Exception as ex: print "Problem Openshift API checks: %s " % ex.message self.zagg_sender.add_zabbix_keys({self.zabbix_api_key: 0}) # Openshift API is down try: if self.args.metrics or self.args.all_checks: self.metric_check() except Exception as ex: print "Problem getting Openshift metrics at /metrics: %s " % ex.message self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping' : 0}) # Openshift Metrics are down self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Network metric sender') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') parser.add_argument('-l', '--local', action='store_true', default=False, help='Run local checks against the local API (https://127.0.0.1)') master_check_group = parser.add_argument_group('Different Checks to Perform') master_check_group.add_argument('--all-checks', action='store_true', default=None, help='Do all of the checks') master_check_group.add_argument('--api-ping', action='store_true', default=None, help='Verify the Openshift API is alive') master_check_group.add_argument('--healthz', action='store_true', default=None, help='Query the Openshift Master API /healthz') master_check_group.add_argument('--metrics', action='store_true', default=None, help='Query the Openshift Master Metrics at /metrics') master_check_group.add_argument('--project-count', action='store_true', default=None, help='Query the Openshift Master for Number of Pods') master_check_group.add_argument('--pod-count', action='store_true', default=None, help='Query the Openshift Master for Number of Running Pods') master_check_group.add_argument('--user-count', action='store_true', default=None, help='Query the Openshift Master for Number of Users') master_check_group.add_argument('--pv-info', action='store_true', default=None, help='Query the Openshift Master for Persistent Volumes Info') master_check_group.add_argument('--nodes-not-ready', action='store_true', default=None, help='Query the Openshift Master for number of nodes not in Ready state') self.args = parser.parse_args() def api_ping(self): """ Verify the Openshift API health is responding correctly """ print "\nPerforming Openshift API ping check..." response = self.ora.get('/api/v1/nodes') print "\nOpenshift API ping is alive" print "Number of nodes in the Openshift cluster: %s" % len(response['items']) self.zagg_sender.add_zabbix_keys({self.zabbix_api_key: 1, 'openshift.master.node.count': len(response['items'])}) def healthz_check(self): """ check the /healthz API call """ print "\nPerforming /healthz check..." response = self.ora.get('/healthz', rtype='text') print "healthz check returns: %s " %response self.zagg_sender.add_zabbix_keys({self.zabbix_healthz_key: str('ok' in response).lower()}) def metric_check(self): """ collect certain metrics from the /metrics API call """ print "\nPerforming /metrics check..." response = self.ora.get('/metrics', rtype='text') for metric_type in text_string_to_metric_families(response): # Collect the apiserver_request_latencies_summary{resource="pods",verb="LIST",quantiles in /metrics # Collect the apiserver_request_latencies_summary{resource="pods",verb="WATCHLIST",quantiles in /metrics if metric_type.name == 'apiserver_request_latencies_summary': key_str = 'openshift.master.apiserver.latency.summary' for sample in metric_type.samples: if (sample[1]['resource'] == 'pods' and sample[1].has_key('quantile') and 'LIST' in sample[1]['verb']): curr_key_str = key_str + ".pods.quantile.%s.%s" % (sample[1]['verb'], sample[1]['quantile'].split('.')[1]) if math.isnan(sample[2]): value = 0 else: value = sample[2] self.zagg_sender.add_zabbix_keys({curr_key_str.lower(): int(value/1000)}) # Collect the scheduler_e2e_scheduling_latency_microseconds{quantiles in /metrics if metric_type.name == 'scheduler_e2e_scheduling_latency_microseconds': for sample in metric_type.samples: if sample[1].has_key('quantile'): key_str = 'openshift.master.scheduler.e2e.scheduling.latency' curr_key_str = key_str + ".quantile.%s" % (sample[1]['quantile'].split('.')[1]) if math.isnan(sample[2]): value = 0 else: value = sample[2] self.zagg_sender.add_zabbix_keys({curr_key_str.lower(): int(value/1000)}) self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping' : 1}) # def project_count(self): """ check the number of projects in Openshift """ print "\nPerforming project count check..." excluded_names = ['openshift', 'openshift-infra', 'default', 'ops-monitor'] response = self.ora.get('/oapi/v1/projects') project_names = [project['metadata']['name'] for project in response['items']] valid_names = set(project_names) - set(excluded_names) print "Project count: %s" % len(valid_names) self.zagg_sender.add_zabbix_keys({'openshift.project.count' : len(valid_names)}) def pod_count(self): """ check the number of pods in Openshift """ print "\nPerforming pod count check..." response = self.ora.get('/api/v1/pods') # Get running pod count running_pod_count = 0 for i in response['items']: if 'containerStatuses' in i['status']: if 'running' in i['status']['containerStatuses'][0]['state']: running_pod_count += 1 # Get running pod count on compute only nodes (non-infra) running_user_pod_count = 0 for i in response['items']: if 'containerStatuses' in i['status']: if 'running' in i['status']['containerStatuses'][0]['state']: if 'nodeSelector' in i['spec']: if i['spec']['nodeSelector']['type'] == 'compute': running_user_pod_count += 1 print "Total pod count: %s" % len(response['items']) print "Running pod count: %s" % running_pod_count print "User Running pod count: %s" % running_user_pod_count self.zagg_sender.add_zabbix_keys({'openshift.master.pod.running.count' : running_pod_count, 'openshift.master.pod.user.running.count' : running_user_pod_count, 'openshift.master.pod.total.count' : len(response['items'])}) def user_count(self): """ check the number of users in Openshift """ print "\nPerforming user count check..." response = self.ora.get('/oapi/v1/users') print "Total user count: %s" % len(response['items']) self.zagg_sender.add_zabbix_keys({'openshift.master.user.count' : len(response['items'])}) def pv_info(self): """ Gather info about the persistent volumes in Openshift """ print "\nPerforming user persistent volume count...\n" response = self.ora.get('/api/v1/persistentvolumes') pv_capacity_total = 0 pv_capacity_available = 0 pv_types = {'Available': 0, 'Bound': 0, 'Released': 0, 'Failed': 0} # Dynamic items variables discovery_key_pv = 'disc.pv' item_prototype_macro_pv = '#OSO_PV' item_prototype_key_count = 'disc.pv.count' item_prototype_key_available = 'disc.pv.available' dynamic_pv_count = defaultdict(int) dynamic_pv_available = defaultdict(int) for item in response['items']: # gather dynamic pv counts dynamic_pv_count[item['spec']['capacity']['storage']] += 1 #get count of each pv type available pv_types[item['status']['phase']] += 1 #get info for the capacity and capacity available capacity = item['spec']['capacity']['storage'] if item['status']['phase'] == 'Available': # get total available capacity pv_capacity_available = pv_capacity_available + int(capacity.replace('Gi', '')) # gather dynamic pv available counts dynamic_pv_available[item['spec']['capacity']['storage']] += 1 pv_capacity_total = pv_capacity_total + int(capacity.replace('Gi', '')) print "Total Persistent Volume Total count: %s" % len(response['items']) print 'Total Persistent Volume Capacity: %s' % pv_capacity_total print 'Total Persisten Volume Available Capacity: %s' % pv_capacity_available self.zagg_sender.add_zabbix_keys( {'openshift.master.pv.total.count' : len(response['items']), 'openshift.master.pv.space.total': pv_capacity_total, 'openshift.master.pv.space.available': pv_capacity_available}) for key, value in pv_types.iteritems(): print "Total Persistent Volume %s count: %s" % (key, value) self.zagg_sender.add_zabbix_keys( {'openshift.master.pv.%s.count' %key.lower() : value}) # Add dynamic items self.zagg_sender.add_zabbix_dynamic_item(discovery_key_pv, item_prototype_macro_pv, dynamic_pv_count.keys()) for size, count in dynamic_pv_count.iteritems(): print print "Total Persistent Volume %s count: %s" % (size, count) print "Total Persistent Volume available %s count: %s" % (size, dynamic_pv_available[size]) self.zagg_sender.add_zabbix_keys({"%s[%s]" %(item_prototype_key_count, size) : count, "%s[%s]" %(item_prototype_key_available, size) : dynamic_pv_available[size]}) def nodes_not_ready(self): """ check the number of nodes in the cluster that are not ready""" print "\nPerforming nodes not ready check..." response = self.ora.get('/api/v1/nodes') nodes_not_schedulable = [] for n in response['items']: if "unschedulable" in n['spec']: nodes_not_schedulable.append(n) nodes_not_ready = [] for n in response['items']: has_ready_status = False for cond in n['status']['conditions']: if cond['reason'] == "KubeletReady": has_ready_status = True if cond['status'].lower() != "true": nodes_not_ready.append(n) if has_ready_status == False: nodes_not_ready.append(n) print "Count of nodes not schedulable: %s" % len(nodes_not_schedulable) print "Count of nodes not ready: %s" % len(nodes_not_ready) self.zagg_sender.add_zabbix_keys( {'openshift.master.nodesnotready.count' : len(nodes_not_ready)}) self.zagg_sender.add_zabbix_keys( {'openshift.master.nodesnotschedulable.count' : len(nodes_not_schedulable)})
def run(self): """ Main function to run the check """ self.parse_args() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) if self.args.local: self.ora = OpenshiftRestApi() self.args.api_ping = True self.args.healthz = True self.zabbix_api_key = 'openshift.master.local.api.ping' self.zabbix_healthz_key = 'openshift.master.local.api.healthz' else: master_cfg_from_yaml = [] with open('/etc/origin/master/master-config.yaml', 'r') as yml: master_cfg_from_yaml = yaml.load(yml) self.ora = OpenshiftRestApi(host=master_cfg_from_yaml['oauthConfig']['masterURL'], verify_ssl=True) self.zabbix_api_key = 'openshift.master.api.ping' self.zabbix_healthz_key = 'openshift.master.api.healthz' try: if self.args.healthz or self.args.all_checks: self.healthz_check() except Exception as ex: print "Problem performing healthz check: %s " % ex.message self.zagg_sender.add_zabbix_keys({self.zabbix_healthz_key: 'false'}) try: if self.args.api_ping or self.args.all_checks: self.api_ping() if self.args.project_count or self.args.all_checks: self.project_count() if self.args.pod_count or self.args.all_checks: self.pod_count() if self.args.user_count or self.args.all_checks: self.user_count() if self.args.pv_info or self.args.all_checks: self.pv_info() if self.args.nodes_not_ready or self.args.all_checks: self.nodes_not_ready() except Exception as ex: print "Problem Openshift API checks: %s " % ex.message self.zagg_sender.add_zabbix_keys({self.zabbix_api_key: 0}) # Openshift API is down try: if self.args.metrics or self.args.all_checks: self.metric_check() except Exception as ex: print "Problem getting Openshift metrics at /metrics: %s " % ex.message self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping' : 0}) # Openshift Metrics are down self.zagg_sender.send_metrics()
class OpenshiftDockerRegigtryChecker(object): """ Checks for the Openshift Cluster Docker Registry """ def __init__(self): self.args = None self.zagg_sender = None self.docker_hosts = [] self.docker_port = None # Assume secure registry self.docker_protocol = 'https' self.docker_service_ip = None self.kubeconfig = None def get_kubeconfig(self): ''' Find kubeconfig to use for OCUtil ''' # Default master kubeconfig kubeconfig = '/tmp/admin.kubeconfig' non_master_kube_dir = '/etc/origin/node' if os.path.isdir(non_master_kube_dir): for my_file in os.listdir(non_master_kube_dir): if my_file.endswith(".kubeconfig"): kubeconfig = os.path.join(non_master_kube_dir, my_file) if self.args.debug: print "Using kubeconfig: {}".format(kubeconfig) self.kubeconfig = kubeconfig def run(self): """ Main function to run the check """ self.parse_args() self.get_kubeconfig() ocutil = OCUtil(config_file=self.kubeconfig, verbose=self.args.verbose) self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) try: oc_yaml = ocutil.get_service('docker-registry') self.get_registry_service(oc_yaml) oc_yaml = ocutil.get_endpoint('docker-registry') self.get_registry_endpoints(oc_yaml) except Exception as ex: print "Problem retreiving registry IPs: %s " % ex.message self.registry_service_check() self.registry_health_check() self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser( description='Openshift Cluster Docker Registry sender') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') self.args = parser.parse_args() def get_registry_service(self, service_yaml): ''' This will get the service IP of the docker registry ''' print "\nGetting Docker Registry service IP..." service = yaml.safe_load(service_yaml) self.docker_service_ip = str(service['spec']['clusterIP']) def get_registry_endpoints(self, endpoint_yaml): """ This will return the docker registry endpoint IPs that are being served inside of kubernetes. """ print "\nFinding the Docker Registry pods via Openshift API calls..." endpoints = yaml.safe_load(endpoint_yaml) self.docker_port = str(endpoints['subsets'][0]['ports'][0]['port']) for address in endpoints['subsets'][0]['addresses']: self.docker_hosts.append(address['ip']) def healthy_registry(self, ip_addr, port, secure=True): ''' Test a specific registry URL In v3.0.2.0, http://registry.url/healthz worked. The '/healthz' was something added by openshift to the docker registry. This should return a http status code of 200 and text of {} (empty json). In 3.1.1 and on, '/' should work and return a 200 to indicate that the registry is up and running. Please see the following url for more info. Look under load balancer health checks: https://github.com/docker/distribution/blob/master/docs/deploying.md#running-a-domain-registry ''' proto = self.docker_protocol if not secure: proto = 'http' url = '{}://{}:{}/'.format(proto, ip_addr, port) try: print "Performing Docker Registry check on URL: {}".format(url) response = urllib2.urlopen(url, timeout=20) if response.getcode() == 200: return True except urllib2.URLError: print "Received error accessing URL: {}".format(url) except socket.timeout: print "Timed out accessing URL: {}".format(url) # Try with /healthz try: url = url + 'healthz' print "Performing Docker Registry check on URL: {}".format(url) response = urllib2.urlopen(url, timeout=20) if response.getcode() == 200: return True except urllib2.URLError: print "Received error access URL: {}".format(url) except socket.timeout: print "Timed out accessing URL: {}".format(url) # We tried regular and 'healthz' URLs. Registry inaccessible. return False def registry_service_check(self): ''' Test and report on health of Docker Registry service ''' status = '0' # Skip if we failed to fetch a valid service IP if self.docker_service_ip != None: if self.healthy_registry(self.docker_service_ip, self.docker_port): status = '1' elif self.healthy_registry(self.docker_service_ip, self.docker_port, secure=False): status = '1' print "\nDocker Registry service status: {}".format(status) self.zagg_sender.add_zabbix_keys( {'openshift.node.registry.service.ping': status}) def registry_health_check(self): """ Check the registry's / URL """ healthy_registries = 0 for host in self.docker_hosts: if self.healthy_registry(host, self.docker_port): healthy_registries += 1 elif self.healthy_registry(host, self.docker_port, secure=False): healthy_registries += 1 healthy_pct = 0 if len(self.docker_hosts) > 0: healthy_pct = (healthy_registries / len(self.docker_hosts) * 100) print "\n%s of %s registry PODs are healthy\n" % ( healthy_registries, len(self.docker_hosts)) self.zagg_sender.add_zabbix_keys( {'openshift.node.registry-pods.healthy_pct': healthy_pct})
class OpsZaggClient(object): """ class to send data to zagg """ def __init__(self): self.zagg_sender = None self.args = None self.config = None self.heartbeat = None def run(self): """ main function to run the script """ self.parse_args() self.parse_config(self.args.config_file) self.config_zagg_sender() if self.args.send_heartbeat: self.add_heartbeat() if self.args.key and self.args.value: self.add_zabbix_key() if self.args.discovery_key and self.args.macro_string and self.args.macro_names: self.add_zabbix_dynamic_item() self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description="Zagg metric sender") parser.add_argument("--send-heartbeat", help="send heartbeat metric to zagg", action="store_true") parser.add_argument("-s", "--host", help="specify host name as registered in Zabbix") parser.add_argument("-z", "--zagg-url", help="url of Zagg server") parser.add_argument("--zagg-user", help="username of the Zagg server") parser.add_argument("--zagg-pass", help="Password of the Zagg server") parser.add_argument("--zagg-ssl-verify", default=None, help="Whether to verify ssl certificates.") parser.add_argument("-v", "--verbose", action="store_true", default=None, help="Verbose?") parser.add_argument("--debug", action="store_true", default=None, help="Debug?") parser.add_argument( "-c", "--config-file", help="ops-zagg-client config file", default="/etc/openshift_tools/zagg_client.yaml" ) key_value_group = parser.add_argument_group("Sending a Key-Value Pair") key_value_group.add_argument("-k", "--key", help="zabbix key") key_value_group.add_argument("-o", "--value", help="zabbix value") low_level_discovery_group = parser.add_argument_group("Sending a Low Level Discovery Item") low_level_discovery_group.add_argument("--discovery-key", help="discovery key") low_level_discovery_group.add_argument("--macro-string", help="macro string") low_level_discovery_group.add_argument("--macro-names", help="comma separated list of macro names") self.args = parser.parse_args() def parse_config(self, config_file): """ parse config file """ self.config = yaml.load(file(config_file)) def config_zagg_sender(self): """ configure the zagg_sender """ zagg_url = self.args.zagg_url if self.args.zagg_url else self.config["zagg"]["url"] zagg_user = self.args.zagg_user if self.args.zagg_user else self.config["zagg"]["user"] zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config["zagg"]["pass"] zagg_verbose = self.args.verbose if self.args.verbose else self.config["zagg"]["verbose"] zagg_debug = self.args.debug if self.args.debug else self.config["zagg"]["debug"] zagg_ssl_verify = self.args.zagg_ssl_verify if self.args.zagg_ssl_verify else self.config["zagg"]["ssl_verify"] host = self.args.host if self.args.host else self.config["host"]["name"] if isinstance(zagg_verbose, str): zagg_verbose = zagg_verbose == "True" if isinstance(zagg_debug, str): zagg_debug = zagg_debug == "True" if isinstance(zagg_ssl_verify, str): zagg_ssl_verify = zagg_ssl_verify == "True" zagg_conn = ZaggConnection( url=zagg_url, user=zagg_user, password=zagg_password, ssl_verify=zagg_ssl_verify, debug=zagg_debug ) self.zagg_sender = ZaggSender(host, zagg_conn, zagg_verbose, zagg_debug) def add_heartbeat(self): """ crate a hearbeat metric """ heartbeat = ZaggHeartbeat( templates=self.config["heartbeat"]["templates"], hostgroups=self.config["heartbeat"]["hostgroups"] ) self.zagg_sender.add_heartbeat(heartbeat) def add_zabbix_key(self): """ send zabbix key/value pair to zagg """ self.zagg_sender.add_zabbix_keys({self.args.key: self.args.value}) def add_zabbix_dynamic_item(self): """ send zabbix low level discovery item to zagg """ self.zagg_sender.add_zabbix_dynamic_item( self.args.discovery_key, self.args.macro_string, self.args.macro_names.split(",") )
class OpenshiftDockerRegigtryChecker(object): """ Checks for the Openshift Cluster Docker Registry """ def __init__(self): self.args = None self.zagg_sender = None self.docker_hosts = [] self.docker_port = None # Assume secure registry self.docker_protocol = 'https' self.docker_service_ip = None self.kubeconfig = None def get_kubeconfig(self): ''' Find kubeconfig to use for OCUtil ''' # Default master kubeconfig kubeconfig = '/etc/origin/master/admin.kubeconfig' non_master_kube_dir = '/etc/origin/node' if os.path.isdir(non_master_kube_dir): for my_file in os.listdir(non_master_kube_dir): if my_file.endswith(".kubeconfig"): kubeconfig = os.path.join(non_master_kube_dir, my_file) if self.args.debug: print "Using kubeconfig: {}".format(kubeconfig) self.kubeconfig = kubeconfig def run(self): """ Main function to run the check """ self.parse_args() self.get_kubeconfig() ocutil = OCUtil(config_file=self.kubeconfig, verbose=self.args.verbose) self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) try: oc_yaml = ocutil.get_service('docker-registry') self.get_registry_service(oc_yaml) oc_yaml = ocutil.get_endpoint('docker-registry') self.get_registry_endpoints(oc_yaml) except Exception as ex: print "Problem retreiving registry IPs: %s " % ex.message self.registry_service_check() self.registry_health_check() self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Openshift Cluster Docker Registry sender') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') self.args = parser.parse_args() def get_registry_service(self, service_yaml): ''' This will get the service IP of the docker registry ''' print "\nGetting Docker Registry service IP..." service = yaml.safe_load(service_yaml) self.docker_service_ip = str(service['spec']['clusterIP']) def get_registry_endpoints(self, endpoint_yaml): """ This will return the docker registry endpoint IPs that are being served inside of kubernetes. """ print "\nFinding the Docker Registry pods via Openshift API calls..." endpoints = yaml.safe_load(endpoint_yaml) self.docker_port = str(endpoints['subsets'][0]['ports'][0]['port']) for address in endpoints['subsets'][0]['addresses']: self.docker_hosts.append(address['ip']) def healthy_registry(self, ip_addr, port, secure=True): ''' Test a specific registry URL In v3.0.2.0, http://registry.url/healthz worked. The '/healthz' was something added by openshift to the docker registry. This should return a http status code of 200 and text of {} (empty json). In 3.1.1 and on, '/' should work and return a 200 to indicate that the registry is up and running. Please see the following url for more info. Look under load balancer health checks: https://github.com/docker/distribution/blob/master/docs/deploying.md#running-a-domain-registry ''' proto = self.docker_protocol if not secure: proto = 'http' url = '{}://{}:{}/'.format(proto, ip_addr, port) try: print "Performing Docker Registry check on URL: {}".format(url) response = urllib2.urlopen(url, timeout=20) if response.getcode() == 200: return True except urllib2.URLError: print "Received error accessing URL: {}".format(url) except socket.timeout: print "Timed out accessing URL: {}".format(url) # Try with /healthz try: url = url + 'healthz' print "Performing Docker Registry check on URL: {}".format(url) response = urllib2.urlopen(url, timeout=20) if response.getcode() == 200: return True except urllib2.URLError: print "Received error access URL: {}".format(url) except socket.timeout: print "Timed out accessing URL: {}".format(url) # We tried regular and 'healthz' URLs. Registry inaccessible. return False def registry_service_check(self): ''' Test and report on health of Docker Registry service ''' status = '0' # Skip if we failed to fetch a valid service IP if self.docker_service_ip != None: if self.healthy_registry(self.docker_service_ip, self.docker_port): status = '1' elif self.healthy_registry(self.docker_service_ip, self.docker_port, secure=False): status = '1' print "\nDocker Registry service status: {}".format(status) self.zagg_sender.add_zabbix_keys({'openshift.node.registry.service.ping' : status}) def registry_health_check(self): """ Check the registry's / URL """ healthy_registries = 0 for host in self.docker_hosts: if self.healthy_registry(host, self.docker_port): healthy_registries += 1 elif self.healthy_registry(host, self.docker_port, secure=False): healthy_registries += 1 healthy_pct = 0 if len(self.docker_hosts) > 0: healthy_pct = (healthy_registries / len(self.docker_hosts) *100) print "\n%s of %s registry PODs are healthy\n" %(healthy_registries, len(self.docker_hosts)) self.zagg_sender.add_zabbix_keys({'openshift.node.registry-pods.healthy_pct' : healthy_pct})
def main(): ''' Get data from etcd API ''' SSL_CLIENT_CERT = '/etc/openshift/master/master.etcd-client.crt' SSL_CLIENT_KEY = '/etc/openshift/master/master.etcd-client.key' OPENSHIFT_MASTER_CONFIG = '/etc/openshift/master/master-config.yaml' # find out the etcd port with open(OPENSHIFT_MASTER_CONFIG, 'r') as f: config = yaml.load(f) API_HOST = config["etcdClientInfo"]["urls"][0] # define the store API URL API_URL = API_HOST + "/v2/stats/store" zs = ZaggSender() # Fetch the store statics from API try: request = requests.get(API_URL, cert=(SSL_CLIENT_CERT, SSL_CLIENT_KEY), verify=False) content = json.loads(request.content) etcd_ping = 1 # parse the items and add it as metrics zs.add_zabbix_keys({'openshift.master.etcd.create.success' : content['createSuccess']}) zs.add_zabbix_keys({'openshift.master.etcd.create.fail' : content['createFail']}) zs.add_zabbix_keys({'openshift.master.etcd.delete.success' : content['deleteSuccess']}) zs.add_zabbix_keys({'openshift.master.etcd.delete.fail' : content['deleteFail']}) zs.add_zabbix_keys({'openshift.master.etcd.get.success' : content['getsSuccess']}) zs.add_zabbix_keys({'openshift.master.etcd.get.fail' : content['getsFail']}) zs.add_zabbix_keys({'openshift.master.etcd.set.success' : content['setsSuccess']}) zs.add_zabbix_keys({'openshift.master.etcd.set.fail' : content['setsFail']}) zs.add_zabbix_keys({'openshift.master.etcd.update.success' : content['updateSuccess']}) zs.add_zabbix_keys({'openshift.master.etcd.update.fail' : content['updateFail']}) zs.add_zabbix_keys({'openshift.master.etcd.watchers' : content['watchers']}) except requests.exceptions.ConnectionError as ex: print "ERROR talking to etcd API: %s" % ex.message etcd_ping = 0 zs.add_zabbix_keys({'openshift.master.etcd.ping' : etcd_ping}) # Finally, sent them to zabbix zs.send_metrics()
cmd = ["ovs-ofctl", "-O", "OpenFlow13", "del-flows", "br0"] for ovs_rule in rule_list: # The trailing '/-1' is the wildcard match rule_to_cookie = "cookie=0x{0}/-1".format(ovs_rule) cmd.append(rule_to_cookie) subprocess.call(cmd) cmd.pop() # Since rule list has changed, force it to regenerate next time self.rules = None ZBX_KEY = "openshift.node.ovs.stray.rules" if __name__ == "__main__": ovs_fixer = OVS() zgs = ZaggSender() # Dev says rules before ports since OpenShift will set up ports, then rules ovs_fixer.get_rule_list() ovs_ports = ovs_fixer.get_port_list() ovs_bad_rules = ovs_fixer.find_bad_rules() # Report bad/stray rules count before removing zgs.add_zabbix_keys({ZBX_KEY: len(ovs_bad_rules)}) zgs.send_metrics() print "Good ports: {0}".format(str(ovs_ports)) print "Bad rules: {0}".format(str(ovs_bad_rules)) ovs_fixer.remove_rules(ovs_bad_rules)
class OpenshiftSkyDNSZaggClient(object): """ Checks for the Openshift Master SkyDNS """ def __init__(self): self.args = None self.zagg_sender = None self.ora = OpenshiftRestApi() self.dns_host = '' self.dns_port = 53 self.openshift_services = [] def run(self): """ Main function to run the check """ self.parse_args() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) self.get_openshift_services() dns_host = [ i for i in self.openshift_services if i['name'] == 'kubernetes' and i['namespace'] == 'default' ] if len(dns_host) == 1: self.dns_host = dns_host[0]['ip'] else: print "\nUnable to find SKY DNS server." print "Please run \"oc get services -n default\" to locate kubernetes service" sys.exit(1) if self.check_dns_port_alive(): self.do_dns_check() self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Network metric sender') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') self.args = parser.parse_args() def check_dns_port_alive(self): """ Verify that the DNS port (TCP 53) is alive """ print "\nPerforming Openshift DNS port check..." try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(1) s.connect((self.dns_host, self.dns_port)) s.close() print "\nOpenshift SkyDNS host: %s, port: %s is OPEN" % ( self.dns_host, self.dns_port) print "================================================\n" self.zagg_sender.add_zabbix_keys( {'openshift.master.skydns.port.open': 1}) return True except socket.error, e: print "\nOpenshift SkyDNS host: %s, port: %s is CLOSED" % ( self.dns_host, self.dns_port) print "Python Error: %s" % e print "================================================\n" self.zagg_sender.add_zabbix_keys( {'openshift.master.skydns.port.open': 0}) return False
class OpenshiftClusterCapacity(object): ''' Checks for cluster capacity ''' def __init__(self): self.args = None self.zagg_sender = None self.ora = None self.sql_conn = None self.zbx_key_prefix = "openshift.master.cluster.compute_nodes." def run(self): ''' Main function to run the check ''' self.parse_args() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) master_cfg = [] with open(self.args.master_config, 'r') as yml: master_cfg = yaml.load(yml) self.ora = OpenshiftRestApi( host=master_cfg['oauthConfig']['masterURL'], verify_ssl=True) self.cluster_capacity() if not self.args.dry_run: self.zagg_sender.send_metrics() def parse_args(self): ''' parse the args from the cli ''' parser = argparse.ArgumentParser(description='Cluster capacity sender') parser.add_argument( '--master-config', default='/etc/origin/master/master-config.yaml', help='Location of OpenShift master-config.yml file') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') parser.add_argument('--dry-run', action='store_true', default=False, help='Do not sent results to Zabbix') self.args = parser.parse_args() def load_nodes(self): ''' load nodes into SQL ''' self.sql_conn.execute('''CREATE TABLE nodes (name text, type text, api text, max_cpu integer, max_memory integer, max_pods integer)''') response = self.ora.get('/api/v1/nodes') for new_node in response['items']: # Skip nodes not in 'Ready' state node_ready = False for condition in new_node['status']['conditions']: if condition['type'] == 'Ready' and \ condition['status'] == 'True': node_ready = True if not node_ready: continue # Skip unschedulable nodes if new_node['spec'].get('unschedulable'): continue node = {} node['name'] = new_node['metadata']['name'] node['type'] = new_node['metadata']['labels'].get( 'type', 'unknown') node['api'] = new_node['metadata']['selfLink'] if 'allocatable' in new_node['status']: cpu = new_node['status']['allocatable']['cpu'] mem = new_node['status']['allocatable']['memory'] node['max_pods'] = int( new_node['status']['allocatable']['pods']) else: cpu = new_node['status']['capacity']['cpu'] mem = new_node['status']['capacity']['memory'] node['max_pods'] = int(new_node['status']['capacity']['pods']) node['max_cpu'] = to_milicores(cpu) node['max_memory'] = to_bytes(mem) if self.args.debug: print "Adding node: {}".format(str(node)) self.sql_conn.execute( 'INSERT INTO nodes VALUES (?,?,?,?,?,?)', (node['name'], node['type'], node['api'], node['max_cpu'], node['max_memory'], node['max_pods'])) @staticmethod def load_container_limits(pod, containers): ''' process/store container limits data ''' for container in containers: if 'limits' in container['resources']: pod['cpu_limits'] = int(pod.get('cpu_limits', 0)) \ + int(to_milicores(container['resources']['limits'].get('cpu', '0'))) pod['memory_limits'] = int(pod.get('memory_limits', 0)) \ + int(to_bytes(container['resources']['limits'].get('memory', '0'))) if 'requests' in container['resources']: pod['cpu_requests'] = int(pod.get('cpu_requests', 0)) \ + int(to_milicores(container['resources']['requests'].get('cpu', '0'))) pod['memory_requests'] = int(pod.get('memory_requests', 0)) \ + int(to_bytes(container['resources']['requests'].get('memory', '0'))) def load_pods(self): ''' put pod details into db ''' self.sql_conn.execute('''CREATE TABLE pods (name text, namespace text, api text, cpu_limits integer, cpu_requests integer, memory_limits integer, memory_requests integer, node text)''') response = self.ora.get('/api/v1/pods') for new_pod in response['items']: if new_pod['status']['phase'] != 'Running': continue pod = {} pod['name'] = new_pod['metadata']['name'] pod['namespace'] = new_pod['metadata']['namespace'] pod['api'] = new_pod['metadata']['selfLink'] pod['node'] = new_pod['spec']['nodeName'] self.load_container_limits(pod, new_pod['spec']['containers']) self.sql_conn.execute( 'INSERT INTO pods VALUES (?,?,?,?,?,?,?,?)', (pod['name'], pod['namespace'], pod['api'], pod.get('cpu_limits'), pod.get('cpu_requests'), pod.get('memory_limits'), pod.get('memory_requests'), pod['node'])) def get_largest_pod(self): ''' return single largest memory request number for all running pods ''' max_pod = 0 for row in self.sql_conn.execute('''SELECT MAX(memory_requests) FROM pods, nodes WHERE pods.node=nodes.name AND nodes.type="compute"'''): max_pod = row[0] return max_pod def how_many_schedulable(self, node_size): ''' return how many pods with memory request 'node_size' can be scheduled ''' nodes = {} # get max mem for each compute node for row in self.sql_conn.execute('''SELECT nodes.name, nodes.max_memory FROM nodes WHERE nodes.type="compute"'''): nodes[row[0]] = { 'max_memory': row[1], # set memory_allocated to '0' because node may have # no pods running, and next SQL query below will # leave this field unpopulated 'memory_scheduled': 0 } # get memory requests for all pods on all compute nodes for row in self.sql_conn.execute('''SELECT nodes.name, SUM(pods.memory_requests) FROM pods, nodes WHERE pods.node=nodes.name AND nodes.type="compute" GROUP BY nodes.name'''): nodes[row[0]]['memory_scheduled'] = row[1] schedulable = 0 for node in nodes.keys(): # TODO: Some containers from `oc get pods --all-namespaces -o json` # don't have resources scheduled, causing memory_scheduled == 0 available = nodes[node]['max_memory'] - \ nodes[node]['memory_scheduled'] num = available / node_size # ignore negative number (overcommitted nodes) if num > 0: schedulable += num return schedulable def get_compute_nodes_max_schedulable_cpu(self): ''' calculate total schedulable CPU (in milicores) for all compute nodes ''' max_cpu = 0 for row in self.sql_conn.execute('''SELECT SUM(nodes.max_cpu) FROM nodes WHERE nodes.type="compute" '''): max_cpu = row[0] return max_cpu def get_compute_nodes_max_schedulable_mem(self): ''' calculate total schedulable memory for all compute nodes ''' max_mem = 0 for row in self.sql_conn.execute('''SELECT SUM(nodes.max_memory) FROM nodes WHERE nodes.type="compute" '''): max_mem = row[0] return max_mem def get_compute_nodes_scheduled_cpu(self): ''' calculate cpu scheduled to pods (total requested and percentage of cluster-wide total) ''' max_cpu = self.get_compute_nodes_max_schedulable_cpu() cpu_requests_for_all_pods = 0 for row in self.sql_conn.execute('''SELECT SUM(pods.cpu_requests) FROM pods, nodes WHERE pods.node = nodes.name AND nodes.type = "compute" '''): cpu_requests_for_all_pods = row[0] cpu_scheduled_as_pct = 100.0 * cpu_requests_for_all_pods / max_cpu cpu_unscheduled = max_cpu - cpu_requests_for_all_pods cpu_unscheduled_as_pct = 100.0 * cpu_unscheduled / max_cpu return (cpu_requests_for_all_pods, cpu_scheduled_as_pct, cpu_unscheduled, cpu_unscheduled_as_pct) def get_compute_nodes_scheduled_mem(self): ''' calculate mem allocated to pods (total requested and percentage of cluster-wide total) ''' max_mem = self.get_compute_nodes_max_schedulable_mem() mem_requests_for_all_pods = 0 for row in self.sql_conn.execute('''SELECT SUM(pods.memory_requests) FROM pods, nodes WHERE pods.node = nodes.name AND nodes.type = "compute" '''): mem_requests_for_all_pods = row[0] mem_scheduled_as_pct = 100.0 * mem_requests_for_all_pods / max_mem mem_unscheduled = max_mem - mem_requests_for_all_pods mem_unscheduled_as_pct = 100.0 * mem_unscheduled / max_mem return (mem_requests_for_all_pods, mem_scheduled_as_pct, mem_unscheduled, mem_unscheduled_as_pct) def get_oversub_cpu(self): ''' return percentage oversubscribed based on CPU limits on runing pods ''' max_cpu = self.get_compute_nodes_max_schedulable_cpu() pod_cpu_limits = 0 # get cpu limits for all running pods for row in self.sql_conn.execute('''SELECT SUM(pods.cpu_limits) FROM pods, nodes WHERE pods.node = nodes.name AND nodes.type = "compute" '''): pod_cpu_limits = row[0] return ((float(pod_cpu_limits) / max_cpu) * 100.0) - 100 def get_oversub_mem(self): ''' return percentage oversubscribed based on memory limits on running pods ''' max_mem = self.get_compute_nodes_max_schedulable_mem() pod_mem_limits = 0 # get mem limits for all running pods for row in self.sql_conn.execute('''SELECT SUM(pods.memory_limits) FROM pods, nodes WHERE pods.node = nodes.name AND nodes.type = "compute" '''): pod_mem_limits = row[0] return ((float(pod_mem_limits) / max_mem) * 100.0) - 100 def do_cpu_stats(self): ''' gather and report CPU statistics ''' # CPU items zbx_key_max_schedulable_cpu = self.zbx_key_prefix + "max_schedulable.cpu" zbx_key_scheduled_cpu = self.zbx_key_prefix + "scheduled.cpu" zbx_key_scheduled_cpu_pct = self.zbx_key_prefix + "scheduled.cpu_pct" zbx_key_unscheduled_cpu = self.zbx_key_prefix + "unscheduled.cpu" zbx_key_unscheduled_cpu_pct = self.zbx_key_prefix + "unscheduled.cpu_pct" zbx_key_oversub_cpu_pct = self.zbx_key_prefix + "oversubscribed.cpu_pct" print "CPU Stats:" max_schedulable_cpu = self.get_compute_nodes_max_schedulable_cpu() self.zagg_sender.add_zabbix_keys( {zbx_key_max_schedulable_cpu: max_schedulable_cpu}) scheduled_cpu, scheduled_cpu_pct, unscheduled_cpu, unscheduled_cpu_pct = self.get_compute_nodes_scheduled_cpu( ) oversub_cpu_pct = self.get_oversub_cpu() print " Scheduled CPU for compute nodes:\t\t\t" + \ "{:>15} milicores".format(scheduled_cpu) print " Unscheduled CPU for compute nodes:\t\t\t" + \ "{:>15} milicores".format(unscheduled_cpu) print " Maximum (total) schedulable CPU for compute " + \ "nodes:\t{:>15} milicores".format(max_schedulable_cpu) print " Percent scheduled CPU for compute nodes:\t\t\t" + \ "{:.2f}%".format(scheduled_cpu_pct) print " Percent unscheduled CPU for compute nodes:\t\t\t" + \ "{:.2f}%".format(unscheduled_cpu_pct) print " Percent oversubscribed CPU for compute nodes: \t\t" + \ "{:.2f}%".format(oversub_cpu_pct) self.zagg_sender.add_zabbix_keys( {zbx_key_scheduled_cpu: scheduled_cpu}) self.zagg_sender.add_zabbix_keys( {zbx_key_scheduled_cpu_pct: int(scheduled_cpu_pct)}) self.zagg_sender.add_zabbix_keys( {zbx_key_unscheduled_cpu: unscheduled_cpu}) self.zagg_sender.add_zabbix_keys( {zbx_key_unscheduled_cpu_pct: int(unscheduled_cpu_pct)}) self.zagg_sender.add_zabbix_keys( {zbx_key_oversub_cpu_pct: int(oversub_cpu_pct)}) def do_mem_stats(self): ''' gather and report memory statistics ''' # Memory items zbx_key_max_schedulable_mem = self.zbx_key_prefix + "max_schedulable.mem" zbx_key_scheduled_mem = self.zbx_key_prefix + "scheduled.mem" zbx_key_scheduled_mem_pct = self.zbx_key_prefix + "scheduled.mem_pct" zbx_key_unscheduled_mem = self.zbx_key_prefix + "unscheduled.mem" zbx_key_unscheduled_mem_pct = self.zbx_key_prefix + "unscheduled.mem_pct" zbx_key_oversub_mem_pct = self.zbx_key_prefix + "oversubscribed.mem_pct" print "\nMemory Stats:" max_schedulable_mem = self.get_compute_nodes_max_schedulable_mem() self.zagg_sender.add_zabbix_keys( {zbx_key_max_schedulable_mem: max_schedulable_mem}) scheduled_mem, scheduled_mem_pct, unscheduled_mem, unscheduled_mem_pct = self.get_compute_nodes_scheduled_mem( ) oversub_mem_pct = self.get_oversub_mem() print " Scheduled mem for compute nodes:\t\t\t" + \ "{:>20} bytes".format(scheduled_mem) print " Unscheduled mem for compute nodes:\t\t\t" + \ "{:>20} bytes".format(unscheduled_mem) print " Maximum (total) schedulable memory for compute nodes:\t" + \ "{:>20} bytes".format(max_schedulable_mem) print " Percent scheduled mem for compute nodes:\t\t\t" + \ "{:.2f}%".format(scheduled_mem_pct) print " Percent unscheduled mem for compute nodes:\t\t\t" + \ "{:.2f}%".format(unscheduled_mem_pct) print " Percent oversubscribed mem for compute nodes: \t\t" + \ "{:.2f}%".format(oversub_mem_pct) self.zagg_sender.add_zabbix_keys( {zbx_key_scheduled_mem: scheduled_mem}) self.zagg_sender.add_zabbix_keys( {zbx_key_scheduled_mem_pct: int(scheduled_mem_pct)}) self.zagg_sender.add_zabbix_keys( {zbx_key_unscheduled_mem: unscheduled_mem}) self.zagg_sender.add_zabbix_keys( {zbx_key_unscheduled_mem_pct: int(unscheduled_mem_pct)}) self.zagg_sender.add_zabbix_keys( {zbx_key_oversub_mem_pct: int(oversub_mem_pct)}) def cluster_capacity(self): ''' check capacity of compute nodes on cluster''' # Other zabbix items zbx_key_max_pods = "openshift.master.cluster.max_mem_pods_schedulable" self.sql_conn = sqlite3.connect(':memory:') self.load_nodes() self.load_pods() self.do_cpu_stats() self.do_mem_stats() print "\nOther stats:" largest = self.get_largest_pod() if self.args.debug: print " Largest memory pod: {}".format(largest) schedulable = self.how_many_schedulable(largest) print " Number of max-size nodes schedulable:\t\t\t\t{}".format( schedulable) self.zagg_sender.add_zabbix_keys({zbx_key_max_pods: schedulable})
class OpsZaggClient(object): """ class to send data to zagg """ def __init__(self): self.zagg_sender = None self.args = None self.config = None self.pcp_metrics = [] self.heartbeat = None def run(self): """ main function to run the script """ self.parse_args() self.parse_config(self.args.config_file) self.config_zagg_sender() if self.args.send_pcp_metrics: self.add_pcp_metrics() if self.args.send_heartbeat: self.add_heartbeat() if self.args.key and self.args.value: self.add_zabbix_key() if self.args.discovery_key and self.args.macro_string and self.args.macro_names: self.add_zabbix_dynamic_item() self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Zagg metric sender') parser.add_argument('--send-pcp-metrics', help="send pcp metrics to zagg", action="store_true") parser.add_argument('--send-heartbeat', help="send heartbeat metric to zagg", action="store_true") parser.add_argument('-s', '--host', help='specify host name as registered in Zabbix') parser.add_argument('-z', '--zagg-url', help='url of Zagg server') parser.add_argument('--zagg-user', help='username of the Zagg server') parser.add_argument('--zagg-pass', help='Password of the Zagg server') parser.add_argument('--zagg-ssl-verify', default=None, help='Whether to verify ssl certificates.') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') parser.add_argument('-c', '--config-file', help='ops-zagg-client config file', default='/etc/openshift_tools/zagg_client.yaml') key_value_group = parser.add_argument_group('Sending a Key-Value Pair') key_value_group.add_argument('-k', '--key', help='zabbix key') key_value_group.add_argument('-o', '--value', help='zabbix value') low_level_discovery_group = parser.add_argument_group( 'Sending a Low Level Discovery Item') low_level_discovery_group.add_argument('--discovery-key', help='discovery key') low_level_discovery_group.add_argument('--macro-string', help='macro string') low_level_discovery_group.add_argument( '--macro-names', help='comma separated list of macro names') self.args = parser.parse_args() def parse_config(self, config_file): """ parse config file """ self.config = yaml.load(file(config_file)) def config_zagg_sender(self): """ configure the zagg_sender """ zagg_url = self.args.zagg_url if self.args.zagg_url else self.config[ 'zagg']['url'] zagg_user = self.args.zagg_user if self.args.zagg_user else self.config[ 'zagg']['user'] zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config[ 'zagg']['pass'] zagg_verbose = self.args.verbose if self.args.verbose else self.config[ 'zagg']['verbose'] zagg_debug = self.args.debug if self.args.debug else self.config[ 'zagg']['debug'] zagg_ssl_verify = self.args.zagg_ssl_verify if self.args.zagg_ssl_verify else self.config[ 'zagg']['ssl_verify'] host = self.args.host if self.args.host else self.config['host']['name'] if isinstance(zagg_verbose, str): zagg_verbose = (zagg_verbose == 'True') if isinstance(zagg_debug, str): zagg_debug = (zagg_debug == 'True') if isinstance(zagg_ssl_verify, str): zagg_ssl_verify = (zagg_ssl_verify == 'True') zagg_conn = ZaggConnection( url=zagg_url, user=zagg_user, password=zagg_password, ssl_verify=zagg_ssl_verify, debug=zagg_debug, ) self.zagg_sender = ZaggSender(host, zagg_conn, zagg_verbose, zagg_debug) def add_heartbeat(self): """ crate a hearbeat metric """ heartbeat = ZaggHeartbeat( templates=self.config['heartbeat']['templates'], hostgroups=self.config['heartbeat']['hostgroups'], ) self.zagg_sender.add_heartbeat(heartbeat) def add_pcp_metrics(self): """ collect pcp metrics to send to ZaggSender """ self.zagg_sender.add_pcp_metrics(self.config['pcp']['metrics']) def add_zabbix_key(self): """ send zabbix key/value pair to zagg """ self.zagg_sender.add_zabbix_keys({self.args.key: self.args.value}) def add_zabbix_dynamic_item(self): """ send zabbix low level discovery item to zagg """ self.zagg_sender.add_zabbix_dynamic_item( self.args.discovery_key, self.args.macro_string, self.args.macro_names.split(','), )
class OpenshiftClusterCapacity(object): ''' Checks for cluster capacity ''' def __init__(self): self.args = None self.zagg_sender = None self.ora = None self.sql_conn = None def run(self): ''' Main function to run the check ''' self.parse_args() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) master_cfg = [] with open(self.args.master_config, 'r') as yml: master_cfg = yaml.load(yml) self.ora = OpenshiftRestApi( host=master_cfg['oauthConfig']['masterURL'], verify_ssl=True) self.node_capacity() if not self.args.dry_run: self.zagg_sender.send_metrics() def parse_args(self): ''' parse the args from the cli ''' parser = argparse.ArgumentParser(description='Cluster capacity sender') parser.add_argument( '--master-config', default='/etc/origin/master/master-config.yaml', help='Location of OpenShift master-config.yml file') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') parser.add_argument('--dry-run', action='store_true', default=False, help='Do not sent results to Zabbix') self.args = parser.parse_args() def load_nodes(self): ''' load nodes into SQL ''' self.sql_conn.execute('''CREATE TABLE nodes (name text, type text, api text, max_cpu integer, max_memory integer, max_pods integer)''') response = self.ora.get('/api/v1/nodes') for new_node in response['items']: # Skip nodes not in 'Ready' state node_ready = False for condition in new_node['status']['conditions']: if condition['type'] == 'Ready' and \ condition['status'] == 'True': node_ready = True if not node_ready: continue node = {} node['name'] = new_node['metadata']['name'] node['type'] = new_node['metadata']['labels']['type'] node['api'] = new_node['metadata']['selfLink'] if 'allocatable' in new_node['status']: cpu = new_node['status']['allocatable']['cpu'] mem = new_node['status']['allocatable']['memory'] node['max_pods'] = int( new_node['status']['allocatable']['pods']) else: cpu = new_node['status']['capacity']['cpu'] mem = new_node['status']['capacity']['memory'] node['max_pods'] = int(new_node['status']['capacity']['pods']) node['max_cpu'] = to_milicores(cpu) node['max_memory'] = to_bytes(mem) if self.args.debug: print "Adding node: {}".format(str(node)) self.sql_conn.execute( 'INSERT INTO nodes VALUES (?,?,?,?,?,?)', (node['name'], node['type'], node['api'], node['max_cpu'], node['max_memory'], node['max_pods'])) @staticmethod def load_container_limits(pod, containers): ''' process/store container limits data ''' for container in containers: if 'limits' in container['resources']: cpu = container['resources']['limits'].get('cpu') if cpu: pod['cpu_limits'] = pod.get('cpu_limits', 0) + \ to_milicores(cpu) mem = container['resources']['limits'].get('memory') if mem: pod['memory_limits'] = pod.get('memory_limits', 0) + \ to_bytes(mem) if 'requests' in container['resources']: cpu = container['resources']['requests'].get('cpu') if cpu: pod['cpu_requests'] = pod.get('cpu_requests', 0) + \ to_milicores(cpu) mem = container['resources']['requests'].get('memory') if mem: pod['memory_requests'] = pod.get('memory_requests', 0) + \ to_bytes(mem) def load_pods(self): ''' put pod details into db ''' self.sql_conn.execute('''CREATE TABLE pods (name text, namespace text, api text, cpu_limits integer, cpu_requets integer, memory_limits integer, memory_requests integer, node text)''') response = self.ora.get('/api/v1/pods') for new_pod in response['items']: if new_pod['status']['phase'] != 'Running': continue pod = {} pod['name'] = new_pod['metadata']['name'] pod['namespace'] = new_pod['metadata']['namespace'] pod['api'] = new_pod['metadata']['selfLink'] pod['node'] = new_pod['spec']['nodeName'] self.load_container_limits(pod, new_pod['spec']['containers']) self.sql_conn.execute( 'INSERT INTO pods VALUES (?,?,?,?,?,?,?,?)', (pod['name'], pod['namespace'], pod['api'], pod.get('cpu_limits'), pod.get('cpu_requests'), pod.get('memory_limits'), pod.get('memory_requests'), pod['node'])) def get_memory_percentage(self): ''' calculate pod memory limits as a percentage of cluster (compute-node) memory capacity ''' node_mem = 0 pod_mem = 0 for row in self.sql_conn.execute('''SELECT SUM(nodes.max_memory) FROM nodes WHERE nodes.type="compute"'''): node_mem = row[0] for row in self.sql_conn.execute('''SELECT SUM(pods.memory_limits) FROM pods, nodes WHERE pods.node=nodes.name AND nodes.type="compute"'''): pod_mem = row[0] return float(100) * pod_mem / node_mem def get_largest_pod(self): ''' return memory limit for largest pod ''' max_pod = 0 for row in self.sql_conn.execute('''SELECT MAX(memory_limits) FROM pods, nodes WHERE pods.node=nodes.name AND nodes.type="compute"'''): max_pod = row[0] return max_pod def how_many_schedulable(self, size): ''' return how many pods with memory 'size' can be scheduled ''' nodes = {} # get max mem for each compute node for row in self.sql_conn.execute('''SELECT nodes.name, nodes.max_memory FROM nodes WHERE nodes.type="compute"'''): nodes[row[0]] = {'max_memory': row[1]} # get memory allocated/granted for each compute node for row in self.sql_conn.execute('''SELECT nodes.name, SUM(pods.memory_limits) FROM pods, nodes WHERE pods.node=nodes.name AND nodes.type="compute" GROUP BY nodes.name'''): nodes[row[0]]['memory_allocated'] = row[1] schedulable = 0 for node in nodes.keys(): available = nodes[node]['max_memory'] - \ nodes[node]['memory_allocated'] num = available / size # ignore negative number (overcommitted nodes) if num > 0: schedulable += num return schedulable def node_capacity(self): ''' check capacity of compute nodes ''' zbx_key_mem_alloc = "openshift.master.cluster.memory_allocation" zbx_key_max_pods = "openshift.master.cluster.max_mem_pods_schedulable" self.sql_conn = sqlite3.connect(':memory:') self.load_nodes() self.load_pods() memory_percentage = self.get_memory_percentage() largest = self.get_largest_pod() if self.args.debug: print "Largest memory pod: {}".format(largest) schedulable = self.how_many_schedulable(largest) print "Percentage of memory allocated: {}".format(memory_percentage) print "Number of max-size nodes schedulable: {}".format(schedulable) self.zagg_sender.add_zabbix_keys( {zbx_key_mem_alloc: int(memory_percentage)}) self.zagg_sender.add_zabbix_keys({zbx_key_max_pods: schedulable})
class EtcdStatusZaggSender(object): """ class to gather all metrics from etcd daemons """ def __init__(self): self.api_host = None self.args = None self.parser = None self.config = None self.etcd_ping = 0 self.default_config = '/etc/openshift_tools/etcd_metrics.yml' self.zagg_sender = ZaggSender() def parse_args(self): '''Parse the arguments for this script''' self.parser = argparse.ArgumentParser(description="Script that gathers metrics from etcd") self.parser.add_argument('-d', '--debug', default=False, action="store_true", help="debug mode") self.parser.add_argument('-v', '--verbose', default=False, action="store_true", help="Verbose?") self.parser.add_argument('-t', '--test', default=False, action="store_true", help="Run the script but don't send to zabbix") self.parser.add_argument('-c', '--configfile', default=self.default_config, help="Config file that contains metrics to be collected, defaults to etcd_metrics.yml") self.args = self.parser.parse_args() def call_etcd_api(self, rest_path): '''Makes the API calls to rest endpoints in etcd''' try: response = requests.get(self.api_host + rest_path, cert=(self.config['etcd_info']['files']['ssl_client_cert'], self.config['etcd_info']['files']['ssl_client_key']), verify=False) self.etcd_ping = 1 except requests.exceptions.ConnectionError as ex: print "ERROR talking to etcd API: {0}".format(ex.message) else: return response.content def json_metric(self, met): '''process json data from etcd''' return_data = {} api_response = self.call_etcd_api(met['path']) if api_response: content = json.loads(api_response) for item in met['values']: return_data[met['prefix'] + item['zab_key']] = content[item['src']] return return_data def text_metric(self, met): '''process text value from etcd''' return_data = {} content = self.call_etcd_api(met['path']) if content: for metric in text_string_to_metric_families(content): # skipping histogram and summary types unless we find a good way to add them to zabbix (unlikely) if metric.type in ['histogram', 'summary']: continue elif metric.type in ['counter', 'gauge'] and metric.name in met['values']: zab_metric_name = met['prefix'] + metric.name.replace('_', '.') if len(metric.samples) > 1: if met['values'][metric.name]: sub_key = met['values'][metric.name] for singlemetric in metric.samples: return_data['{0}.{1}'.format(zab_metric_name, singlemetric[1][sub_key])] = singlemetric[2] else: return_data[zab_metric_name] = metric.samples[0][2] else: if self.args.debug: print 'Got unknown type of metric from etcd, skipping it: ({0}) '.format(metric.type) return return_data def run(self): ''' Get data from etcd API ''' self.parse_args() try: with open(self.args.configfile, 'r') as configfile: self.config = yaml.load(configfile) except IOError as ex: print 'There was a problem opening the config file: {0}'.format(ex) print 'Exiting' sys.exit(1) # find out the etcd port try: with open(self.config['etcd_info']['files']['openshift_master_config'], 'r') as f: om_config = yaml.load(f) except IOError as ex: print 'Problem opening openshift master config: {0}'.format(ex) sys.exit(2) else: self.api_host = om_config["etcdClientInfo"]["urls"][0] # let's get the metrics for metric in self.config['etcd_info']['metrics']: if metric['type'] == 'text': self.zagg_sender.add_zabbix_keys(self.text_metric(metric)) elif metric['type'] == 'json': self.zagg_sender.add_zabbix_keys(self.json_metric(metric)) self.send_zagg_data() def send_zagg_data(self): ''' Sending the data to zagg or displaying it in console when test option is used ''' self.zagg_sender.add_zabbix_keys({'openshift.master.etcd.ping' : self.etcd_ping}) if not self.args.test: self.zagg_sender.send_metrics() else: self.zagg_sender.print_unique_metrics()
def main(): ''' Get data from etcd API ''' SSL_CLIENT_CERT = '/etc/openshift/master/master.etcd-client.crt' SSL_CLIENT_KEY = '/etc/openshift/master/master.etcd-client.key' OPENSHIFT_MASTER_CONFIG = '/etc/openshift/master/master-config.yaml' # find out the etcd port with open(OPENSHIFT_MASTER_CONFIG, 'r') as f: config = yaml.load(f) API_HOST = config["etcdClientInfo"]["urls"][0] # define the store API URL API_URL = API_HOST + "/v2/stats/store" zs = ZaggSender() # Fetch the store statics from API try: request = requests.get(API_URL, cert=(SSL_CLIENT_CERT, SSL_CLIENT_KEY), verify=False) content = json.loads(request.content) etcd_ping = 1 # parse the items and add it as metrics zs.add_zabbix_keys( {'openshift.master.etcd.create.success': content['createSuccess']}) zs.add_zabbix_keys( {'openshift.master.etcd.create.fail': content['createFail']}) zs.add_zabbix_keys( {'openshift.master.etcd.delete.success': content['deleteSuccess']}) zs.add_zabbix_keys( {'openshift.master.etcd.delete.fail': content['deleteFail']}) zs.add_zabbix_keys( {'openshift.master.etcd.get.success': content['getsSuccess']}) zs.add_zabbix_keys( {'openshift.master.etcd.get.fail': content['getsFail']}) zs.add_zabbix_keys( {'openshift.master.etcd.set.success': content['setsSuccess']}) zs.add_zabbix_keys( {'openshift.master.etcd.set.fail': content['setsFail']}) zs.add_zabbix_keys( {'openshift.master.etcd.update.success': content['updateSuccess']}) zs.add_zabbix_keys( {'openshift.master.etcd.update.fail': content['updateFail']}) zs.add_zabbix_keys( {'openshift.master.etcd.watchers': content['watchers']}) except requests.exceptions.ConnectionError as ex: print "ERROR talking to etcd API: %s" % ex.message etcd_ping = 0 zs.add_zabbix_keys({'openshift.master.etcd.ping': etcd_ping}) # Finally, sent them to zabbix zs.send_metrics()
class OpsZaggClient(object): """ class to send data to zagg """ def __init__(self): self.zagg_sender = None self.args = None self.config = None self.pcp_metrics = [] self.heartbeat = None def run(self): """ main function to run the script """ self.parse_args() self.parse_config(self.args.config_file) self.config_zagg_sender() if self.args.send_pcp_metrics: self.add_pcp_metrics() if self.args.send_heartbeat: self.add_heartbeat() if self.args.key and self.args.value: self.add_zabbix_key() if self.args.discovery_key and self.args.macro_string and self.args.macro_names: self.add_zabbix_dynamic_item() self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Zagg metric sender') parser.add_argument('--send-pcp-metrics', help="send pcp metrics to zagg", action="store_true") parser.add_argument('--send-heartbeat', help="send heartbeat metric to zagg", action="store_true") parser.add_argument('-s', '--host', help='specify host name as registered in Zabbix') parser.add_argument('-z', '--zagg-url', help='url of Zagg server') parser.add_argument('--zagg-user', help='username of the Zagg server') parser.add_argument('--zagg-pass', help='Password of the Zagg server') parser.add_argument('--zagg-ssl-verify', default=None, help='Whether to verify ssl certificates.') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') parser.add_argument('-c', '--config-file', help='ops-zagg-client config file', default='/etc/openshift_tools/zagg_client.yaml') key_value_group = parser.add_argument_group('Sending a Key-Value Pair') key_value_group.add_argument('-k', '--key', help='zabbix key') key_value_group.add_argument('-o', '--value', help='zabbix value') low_level_discovery_group = parser.add_argument_group('Sending a Low Level Discovery Item') low_level_discovery_group.add_argument('--discovery-key', help='discovery key') low_level_discovery_group.add_argument('--macro-string', help='macro string') low_level_discovery_group.add_argument('--macro-names', help='comma separated list of macro names') self.args = parser.parse_args() def parse_config(self, config_file): """ parse config file """ self.config = yaml.load(file(config_file)) def config_zagg_sender(self): """ configure the zagg_sender """ zagg_url = self.args.zagg_url if self.args.zagg_url else self.config['zagg']['url'] zagg_user = self.args.zagg_user if self.args.zagg_user else self.config['zagg']['user'] zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config['zagg']['pass'] zagg_verbose = self.args.verbose if self.args.verbose else self.config['zagg']['verbose'] zagg_debug = self.args.debug if self.args.debug else self.config['zagg']['debug'] zagg_ssl_verify = self.args.zagg_ssl_verify if self.args.zagg_ssl_verify else self.config['zagg']['ssl_verify'] host = self.args.host if self.args.host else self.config['host']['name'] if isinstance(zagg_verbose, str): zagg_verbose = (zagg_verbose == 'True') if isinstance(zagg_debug, str): zagg_debug = (zagg_debug == 'True') if isinstance(zagg_ssl_verify, str): zagg_ssl_verify = (zagg_ssl_verify == 'True') zagg_conn = ZaggConnection(url=zagg_url, user=zagg_user, password=zagg_password, ssl_verify=zagg_ssl_verify, debug=zagg_debug, ) self.zagg_sender = ZaggSender(host, zagg_conn, zagg_verbose, zagg_debug) def add_heartbeat(self): """ crate a hearbeat metric """ heartbeat = ZaggHeartbeat(templates=self.config['heartbeat']['templates'], hostgroups=self.config['heartbeat']['hostgroups'], ) self.zagg_sender.add_heartbeat(heartbeat) def add_pcp_metrics(self): """ collect pcp metrics to send to ZaggSender """ self.zagg_sender.add_pcp_metrics(self.config['pcp']['metrics']) def add_zabbix_key(self): """ send zabbix key/value pair to zagg """ self.zagg_sender.add_zabbix_keys({self.args.key : self.args.value}) def add_zabbix_dynamic_item(self): """ send zabbix low level discovery item to zagg """ self.zagg_sender.add_zabbix_dynamic_item(self.args.discovery_key, self.args.macro_string, self.args.macro_names.split(','), )
class OpenshiftWebServiceChecker(object): """ Checks for Openshift Pods """ def __init__(self): self.args = None self.ora = None self.zagg_sender = None self.service_ip = None self.service_port = '443' def run(self): """ Main function to run the check """ self.parse_args() self.ora = OpenshiftRestApi() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) try: self.get_service() status = self.check_service() except Exception as ex: print "Problem retreiving data: %s " % ex.message self.zagg_sender.add_zabbix_keys({ "openshift.webservice.{}.status".format(self.args.pod) : status}) self.zagg_sender.send_metrics() def get_service(self): """ Gets the service for a pod """ print "\nLooking up services for pod\n" api_url = "/api/v1/services" if (str(self.args.namespace) != "None") & \ (str(self.args.namespace) != "all"): api_url = '/api/v1/namespaces/{}/services'.format(self.args.namespace) print "using api url {}".format(api_url) api_yaml = self.ora.get(api_url, rtype='text') services = yaml.safe_load(api_yaml) for service in services["items"]: if self.args.pod and \ self.args.pod in service["metadata"]["name"]: print "service IP is {}".format(service["spec"]["clusterIP"]) self.service_ip = service["spec"]["clusterIP"] if self.args.portname != None: for port in service["spec"]["ports"]: if port["name"] == self.args.portname: self.service_port = port["port"] else: self.service_port = service["spec"]["ports"][0]["port"] else: pass def check_service(self): """ Checks the web service """ print "\nChecking web service\n" if self.args.insecure: proto = 'http' else: proto = 'https' url = '{}://{}:{}/{}'.format( proto, self.service_ip, self.service_port, self.args.url, ) try: print "Performing check on URL: {}".format(url) response = urllib2.urlopen(url, timeout=30) if str(response.getcode()) == self.args.status: if self.args.content == None \ or self.args.content in response.read(): return True except urllib2.URLError: print "Received error accessing URL: {}".format(url) except socket.timeout: print "Timed out accessing URL: {}".format(url) return False def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Openshift pod sender') parser.add_argument('-p', '--pod', default=None, help='Check for pod with this specific name') parser.add_argument('-n', '--namespace', default=None, help='Check for pods in this namespace - "all" for all') parser.add_argument('-P', '--portname', default=None, help='name of the port to check') parser.add_argument('-u', '--url', default="/", help='URL to check. Defaults to "/".') parser.add_argument('-s', '--status', default="200", help='HTTP status code to expect. Defaults to 200') parser.add_argument('-c', '--content', default=None, help='Looks for a string in the content of the response.') parser.add_argument('-i', '--insecure', help='Use insecure http connection') parser.add_argument('-S', '--secure', help='Use secure https connection (default)') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') self.args = parser.parse_args()
class OpsZaggClient(object): """ class to send data to zagg """ def __init__(self): self.zagg_sender = None self.args = None self.config = None self.pcp_metrics = [] self.heartbeat = None def run(self): """ main function to run the script """ self.parse_args() self.parse_config(self.args.config_file) self.config_zagg_sender() if self.args.send_pcp_metrics: self.add_pcp_metrics() if self.args.send_heartbeat: self.add_heartbeat() if self.args.key and self.args.value: self.add_zabbix_key() self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Zagg metric sender') parser.add_argument('--send-pcp-metrics', help="send pcp metrics to zagg", action="store_true") parser.add_argument('--send-heartbeat', help="send heartbeat metric to zagg", action="store_true") parser.add_argument('-s', '--host', help='specify host name as registered in Zabbix') parser.add_argument('-z', '--zagg-server', help='hostname of IP of Zagg server') parser.add_argument('--zagg-user', help='username of the Zagg server') parser.add_argument('--zagg-pass', help='password of the Zagg server') parser.add_argument('-k', '--key', help='zabbix key') parser.add_argument('-o', '--value', help='zabbix value') parser.add_argument('-c', '--config-file', help='ops-zagg-client config file', default='/etc/openshift_tools/zagg_client.yaml') self.args = parser.parse_args() def parse_config(self, config_file): """ parse config file """ self.config = yaml.load(file(config_file)) def config_zagg_sender(self): """ configure the zagg_sender """ zagg_server = self.args.zagg_server if self.args.zagg_server else self.config['zagg']['host'] zagg_user = self.args.zagg_user if self.args.zagg_user else self.config['zagg']['user'] zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config['zagg']['pass'] host = self.args.host if self.args.host else self.config['host']['name'] zagg_conn = ZaggConnection(host=zagg_server, user=zagg_user, password=zagg_password, ) self.zagg_sender = ZaggSender(host, zagg_conn) def add_heartbeat(self): """ crate a hearbeat metric """ heartbeat = ZaggHeartbeat(templates=self.config['heartbeat']['templates'], hostgroups=self.config['heartbeat']['hostgroups'], ) self.zagg_sender.add_heartbeat(heartbeat) def add_pcp_metrics(self): """ collect pcp metrics to send to ZaggSender """ self.zagg_sender.add_pcp_metrics(self.config['pcp']['metrics']) def add_zabbix_key(self): """ send zabbix key/value pair to zagg """ self.zagg_sender.add_zabbix_keys({self.args.key : self.args.value})
def send_zagg_data(key_zabbix, result): ''' send data to Zagg''' zgs = ZaggSender() zgs.add_zabbix_keys({key_zabbix: result}) zgs.send_metrics()
def send_zagg_data(build_ran, create_app, http_code, run_time): """ send data to Zagg""" logger.debug("send_zagg_data()") zgs_time = time.time() zgs = ZaggSender() logger.info("Send data to Zagg") if build_ran == 1: zgs.add_zabbix_keys({'openshift.master.app.build.create': create_app}) zgs.add_zabbix_keys( {'openshift.master.app.build.create.code': http_code}) zgs.add_zabbix_keys( {'openshift.master.app.build.create.time': run_time}) else: zgs.add_zabbix_keys({'openshift.master.app.create': create_app}) zgs.add_zabbix_keys({'openshift.master.app.create.code': http_code}) zgs.add_zabbix_keys({'openshift.master.app.create.time': run_time}) try: zgs.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - zgs_time)) except: logger.error("Error sending data to Zagg: %s \n %s ", sys.exc_info()[0], sys.exc_info()[1])
if not dds.is_loopback: dds.data_space_total = dds.data_space_used + dds.data_space_available dds.metadata_space_total = dds.metadata_space_used + dds.metadata_space_available dds.data_space_percent_available = (dds.data_space_available / dds.data_space_total) * 100 dds.metadata_space_percent_available = (dds.metadata_space_available / dds.metadata_space_total) * 100 return dds if __name__ == "__main__": keys = None exit_code = 0 zs = ZaggSender() try: cli = AutoVersionClient(base_url='unix://var/run/docker.sock') dw = DockerWatcher(cli) dw_dds = dw.get_disk_usage() keys = { 'docker.storage.data.space.used': dw_dds.data_space_used, 'docker.storage.data.space.available': dw_dds.data_space_available, 'docker.storage.data.space.percent_available': dw_dds.data_space_percent_available, 'docker.storage.data.space.total': dw_dds.data_space_total, 'docker.storage.metadata.space.used': dw_dds.metadata_space_used, 'docker.storage.metadata.space.available': dw_dds.metadata_space_available, 'docker.storage.metadata.space.percent_available':
class CertificateReporting(object): ''' class with ability to parse through x509 certificates to extract and report to zabbix the expiration date assocated with the cert ''' def __init__(self): ''' constructor ''' self.args = None self.current_date = datetime.datetime.today() self.parse_args() self.zsend = ZaggSender(debug=self.args.debug) def dprint(self, msg): ''' debug printer ''' if self.args.debug: print msg def parse_args(self): ''' parse command line args ''' argparser = argparse.ArgumentParser(description='certificate checker') argparser.add_argument('--debug', default=False, action='store_true') argparser.add_argument( '--cert-list', default="/etc/origin", type=str, help='comma-separated list of dirs/certificates') self.args = argparser.parse_args() def days_to_expiration(self, cert_file): ''' return days to expiration for a certificate ''' crypto = OpenSSL.crypto cert = open(cert_file).read() certificate = crypto.load_certificate(crypto.FILETYPE_PEM, cert) expiration_date_asn1 = certificate.get_notAfter() # expiration returned in ASN.1 GENERALIZEDTIME format # YYYYMMDDhhmmss with a trailing 'Z' expiration_date = parser.parse(expiration_date_asn1).replace( tzinfo=None) delta = expiration_date - self.current_date return delta.days def process_certificates(self): ''' check through list of certificates/directories ''' for cert in self.args.cert_list.split(','): if not os.path.exists(cert): self.dprint("{} does not exist. skipping.".format(cert)) continue mode = os.stat(cert).st_mode if S_ISDIR(mode): self.all_certs_in_dir(cert) elif S_ISREG(mode): days = self.days_to_expiration(cert) self.dprint("{} in {} days".format(cert, days)) self.add_to_zabbix(cert, days) else: self.dprint("not a file. not a directory. skipping.") # now push out all queued up item(s) to zabbix self.zsend.send_metrics() def add_to_zabbix(self, certificate, days_to_expiration): ''' queue up item for submission to zabbix ''' self.zsend.add_zabbix_dynamic_item(CERT_DISC_KEY, CERT_DISC_MACRO, [certificate]) zbx_key = "{}[{}]".format(CERT_DISC_KEY, certificate) self.zsend.add_zabbix_keys({zbx_key: days_to_expiration}) def all_certs_in_dir(self, directory): ''' recursively go through all *.crt files in 'directory' ''' for root, _, filenames in os.walk(directory): for filename in filenames: if filename.endswith('.crt'): full_path = os.path.join(root, filename) days = self.days_to_expiration(full_path) self.dprint("{} in {} days".format(full_path, days)) self.add_to_zabbix(full_path, days)
def __init__(self): ''' constructor ''' self.args = None self.current_date = datetime.datetime.today() self.parse_args() self.zsend = ZaggSender(debug=self.args.debug)
""" docker container DNS tester """ # Adding the ignore because it does not like the naming of the script # to be different than the class name # pylint: disable=invalid-name from docker import AutoVersionClient from openshift_tools.monitoring.zagg_sender import ZaggSender ZBX_KEY = "docker.container.dns.resolution" if __name__ == "__main__": cli = AutoVersionClient(base_url="unix://var/run/docker.sock") container = cli.create_container( image="docker-registry.ops.rhcloud.com/ops/oso-rhel7-host-monitoring", command="getent hosts redhat.com" ) cli.start(container=container.get("Id")) exit_code = cli.wait(container) cli.remove_container(container.get("Id")) zs = ZaggSender() zs.add_zabbix_keys({ZBX_KEY: exit_code}) print "Sending these metrics:" print ZBX_KEY + ": " + str(exit_code) zs.send_metrics() print "\nDone.\n"
class OpenshiftWebServiceChecker(object): """ Checks for Openshift Pods """ def __init__(self): self.args = None self.ora = None self.zagg_sender = None self.service_ip = None self.service_port = '443' def run(self): """ Main function to run the check """ self.parse_args() self.ora = OpenshiftRestApi() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) try: self.get_service() status = self.check_service() except Exception as ex: print "Problem retreiving data: %s " % ex.message self.zagg_sender.add_zabbix_keys( {"openshift.webservice.{}.status".format(self.args.pod): status}) self.zagg_sender.send_metrics() def get_service(self): """ Gets the service for a pod """ print "\nLooking up services for pod\n" api_url = "/api/v1/services" if (str(self.args.namespace) != "None") & \ (str(self.args.namespace) != "all"): api_url = '/api/v1/namespaces/{}/services'.format( self.args.namespace) print "using api url {}".format(api_url) api_yaml = self.ora.get(api_url, rtype='text') services = yaml.safe_load(api_yaml) for service in services["items"]: if self.args.pod and \ self.args.pod in service["metadata"]["name"]: print "service IP is {}".format(service["spec"]["clusterIP"]) self.service_ip = service["spec"]["clusterIP"] if self.args.portname != None: for port in service["spec"]["ports"]: if port["name"] == self.args.portname: self.service_port = port["port"] else: self.service_port = service["spec"]["ports"][0]["port"] else: pass def check_service(self): """ Checks the web service """ print "\nChecking web service\n" if self.args.insecure: proto = 'http' else: proto = 'https' url = '{}://{}:{}/{}'.format( proto, self.service_ip, self.service_port, self.args.url, ) try: print "Performing check on URL: {}".format(url) response = urllib2.urlopen(url, timeout=30) if str(response.getcode()) == self.args.status: if self.args.content == None \ or self.args.content in response.read(): return True except urllib2.URLError: print "Received error accessing URL: {}".format(url) except socket.timeout: print "Timed out accessing URL: {}".format(url) return False def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Openshift pod sender') parser.add_argument('-p', '--pod', default=None, help='Check for pod with this specific name') parser.add_argument( '-n', '--namespace', default=None, help='Check for pods in this namespace - "all" for all') parser.add_argument('-P', '--portname', default=None, help='name of the port to check') parser.add_argument('-u', '--url', default="/", help='URL to check. Defaults to "/".') parser.add_argument('-s', '--status', default="200", help='HTTP status code to expect. Defaults to 200') parser.add_argument( '-c', '--content', default=None, help='Looks for a string in the content of the response.') parser.add_argument('-i', '--insecure', help='Use insecure http connection') parser.add_argument('-S', '--secure', help='Use secure https connection (default)') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') self.args = parser.parse_args()