Exemplo n.º 1
0
 def output(self, connect_time, total_time):
     precision = self.precision
     cell_info = "HBase table '{0}' row '{1}' column '{2}'".format(self.table, self.row, self.column)
     value = self.value
     self.msg = "cell value = '{0}'".format(value)
     if isFloat(value):
         log.info('value is float, checking thresholds')
         self.check_thresholds(value)
     self.msg += " for {0}".format(cell_info)
     query_time = self.timings[self.column]['read']
     perfdata = ''
     perfdata += ' total_time={0:0.{precision}f}ms'.format(total_time, precision=precision)
     perfdata += ' connect_time={0:0.{precision}f}ms'.format(connect_time, precision=precision)
     perfdata += ' query_time={0:0.{precision}f}ms'.format(query_time, precision=precision)
     # show the timings at the end of the user output as well as in the graphing perfdata section
     self.msg += ',' + perfdata
     self.msg += ' |'
     if self.graph:
         if isFloat(value):
             self.msg += ' value={0}'.format(value)
             if self.units:
                 self.msg += str(self.units)
             self.msg += self.get_perf_thresholds()
         else:
             self.msg += ' value=NaN'
     self.msg += perfdata
 def msg_metrics(self, metrics):
     if not metrics:
         qquit(
             'UNKNOWN',
             "no matching metrics found, check your filters: --" +
             " / --".join(self.filter_types))
     for metric in sorted(metrics):
         value = metrics[metric]
         self.msg += ' {metric}={value}'.format(metric=metric, value=value)
         #if len(metrics) == 1:
         #self.check_thresholds(metrics.itervalues().next())
         # safer for python 3 without having to use six.next(six.itervalues(metrics))
         #metric = metrics.keys()[0]
         #value = metrics[metric]
         if isFloat(value):
             self.check_thresholds(value)
     self.msg += ' |'
     for metric in sorted(metrics):
         value = metrics[metric]
         # try not to break graphing when Attivio gives us 'NaN' value
         if not isFloat(value):
             value = 0
         self.msg += " '{metric}'={value}".format(metric=metric,
                                                  value=value)
         if self.verbose:
             self.msg += self.get_perf_thresholds()
 def output(self, connect_time, total_time):
     precision = self.precision
     cell_info = "HBase table '{0}' row '{1}' column '{2}'".format(
         self.table, self.row, self.column)
     value = self.value
     self.msg = "cell value = '{0}'".format(value)
     if isFloat(value):
         log.info('value is float, checking thresholds')
         self.check_thresholds(value)
     self.msg += " for {0}".format(cell_info)
     query_time = self.timings[self.column]['read']
     perfdata = ''
     perfdata += ' total_time={0:0.{precision}f}ms'.format(
         total_time, precision=precision)
     perfdata += ' connect_time={0:0.{precision}f}ms'.format(
         connect_time, precision=precision)
     perfdata += ' query_time={0:0.{precision}f}ms'.format(
         query_time, precision=precision)
     # show the timings at the end of the user output as well as in the graphing perfdata section
     self.msg += ',' + perfdata
     self.msg += ' |'
     if self.graph:
         if isFloat(value):
             self.msg += ' value={0}'.format(value)
             if self.units:
                 self.msg += str(self.units)
             self.msg += self.get_perf_thresholds()
         else:
             self.msg += ' value=NaN'
     self.msg += perfdata
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         bean = json_data['beans'][0]
         space_used_pc = bean['PercentUsed']
         # the way below is more informative
         #assert type(space_used_pc) == float
         if re.search(r'e-\d+$', str(space_used_pc)):
             space_used_pc = 0
         if not isFloat(space_used_pc):
             raise UnknownError("non-float returned for PercentUsed by namenode '{0}:{1}'"\
                                .format(self.host, self.port))
         assert space_used_pc >= 0
         stats = {}
         for stat in ('Total', 'TotalBlocks', 'TotalFiles', 'Used'):
             stats[stat] = bean[stat]
             if not isInt(stats[stat]):
                 raise UnknownError("non-integer returned for {0} by namenode '{1}:{2}'"\
                                    .format(stat, self.host, self.port))
             stats[stat] = int(stats[stat])
         self.ok()
         self.msg = 'HDFS space used = {0:.2f}% ({1}/{2})'\
                    .format(space_used_pc, humanize.naturalsize(stats['Used']), humanize.naturalsize(stats['Total']))
         self.check_thresholds(space_used_pc)
         self.msg += ", in {0:d} files spread across {1:d} blocks".format(stats['TotalFiles'], stats['TotalBlocks'])
         self.msg += " | 'HDFS % space used'={0:f}%{1}".format(space_used_pc, self.get_perf_thresholds())
         self.msg += " 'HDFS space used'={0:d}b".format(stats['Used'])
         self.msg += " 'HDFS file count'={0:d}".format(stats['TotalFiles'])
         self.msg += " 'HDFS block count'={0:d}".format(stats['TotalBlocks'])
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
     except ValueError as _:
         raise UnknownError("invalid json returned for LiveNodes by Namenode '{0}:{1}': {2}"\
                            .format(self.host, self.port, _))
 def parse_json(self, json_data):
     if self.list_jobs:
         jobs = json_data['jobs']
         print('Jenkins Jobs:\n')
         for job in jobs:
             print(job['name'])
         sys.exit(ERRORS['UNKNOWN'])
     # this method has a nicer job not found error message
     # but it's less efficient if querying a Jenkins server with lots of jobs
     #job = None
     #for _ in jobs:
     #    if _['name'].lower() == self.job.lower():
     #        job = _
     #        break
     #if not job:
     #    raise CriticalError("job '{job}' not found. See --list to see available jobs".format(job=self.job))
     health_report = json_data['healthReport']
     if not health_report:
         raise UnknownError("no health report found for job '{job}' (not built yet?)".format(job=self.job))
     health_report = health_report[0]
     score = health_report['score']
     if not isFloat(score):
         raise UnknownError("non-numeric score returned in health report for job '{job}'".format(job=self.job))
     score = float(score)
     description = health_report['description']
     self.msg += "'{job}' health report score = {score}".format(job=self.job, score=score)
     self.check_thresholds(score)
     #self.msg += ", description: '{description}'".format(description=description)
     self.msg += ", {description}".format(description=description)
     self.msg += ' | health_report_score={score}%{thresholds}'\
                 .format(score=score, thresholds=self.get_perf_thresholds(boundary='lower'))
Exemplo n.º 6
0
 def create_perfdata(self):
     perfdata = ''
     if isFloat(self._read_value):
         perfdata = " | '{0}'={1}{2} query_time={3:.7f}s".format(
             self.key, self._read_value, self.get_perf_thresholds(),
             self._read_timing)
     return perfdata
Exemplo n.º 7
0
 def get_stat(node, stat):
     stat_num = node[stat]
     if not isFloat(stat_num):
         raise UnknownError('{stat} is not a float! {msg}'.format(stat=stat, msg=support_msg_api()))
     stat_num = float('{0:.2f}'.format(stat_num))
     if stat_num < 0:
         raise UnknownError('{stat} < 0 ?!!! {msg}'.format(stat=stat, msg=support_msg_api()))
     return stat_num
 def parse_json(self, json_data):
     load_average = json_data['systemDiagnostics']['aggregateSnapshot']['processorLoadAverage']
     if not isFloat(load_average):
         raise CriticalError('processorLoadAverage \'{}\' is not a float!!'.format(load_average))
     load_average = float(load_average)
     self.ok()
     self.msg = 'Nifi processor load average = {}'.format(load_average)
     self.check_thresholds(load_average)
     self.msg += ' | processor_load_average={}{}'.format(load_average, self.get_perf_thresholds())
 def get_recent_failures(node):
     recent_failures = node['recentFailures']
     if not isFloat(recent_failures):
         raise UnknownError('recentFailures is not a float! {0}'.format(
             support_msg_api()))
     recent_failures = float('{0:.2f}'.format(recent_failures))
     if recent_failures < 0:
         raise UnknownError('recentFailures < 0 ?!!! {0}'.format(
             support_msg_api()))
     return recent_failures
Exemplo n.º 10
0
 def get_recent_failure_ratio(node):
     recent_failure_ratio = node['recentFailureRatio']
     if not isFloat(recent_failure_ratio):
         raise UnknownError('recentFailureRatio is not a float! {0}'.format(support_msg_api()))
     recent_failure_ratio = float('{0:.2f}'.format(recent_failure_ratio))
     if recent_failure_ratio < 0:
         raise UnknownError('recentFailureRatio < 0 ?!!! {0}'.format(support_msg_api()))
     if recent_failure_ratio > 1:
         raise UnknownError('recentFailureRatio > 1 ?!!! {0}'.format(support_msg_api()))
     return recent_failure_ratio
 def msg_metrics(self, metrics):
     if not metrics:
         qquit('UNKNOWN', "no matching metrics found, check your filters: --" + " / --".join(self.filter_types))
     for metric in sorted(metrics):
         value = metrics[metric]
         self.msg += ' {metric}={value}'.format(metric=metric, value=value)
     #if len(metrics) == 1:
         #self.check_thresholds(metrics.itervalues().next())
         # safer for python 3 without having to use six.next(six.itervalues(metrics))
         #metric = metrics.keys()[0]
         #value = metrics[metric]
         if isFloat(value):
             self.check_thresholds(value)
     self.msg += ' |'
     for metric in sorted(metrics):
         value = metrics[metric]
         # try not to break graphing when Attivio gives us 'NaN' value
         if not isFloat(value):
             value = 0
         self.msg += " '{metric}'={value}".format(metric=metric, value=value)
         if self.verbose:
             self.msg += self.get_perf_thresholds()
 def parse_json(self, json_data):
     load_average = json_data['systemDiagnostics']['aggregateSnapshot'][
         'processorLoadAverage']
     if not isFloat(load_average):
         raise CriticalError(
             'processorLoadAverage \'{}\' is not a float!!'.format(
                 load_average))
     load_average = float(load_average)
     self.ok()
     self.msg = 'Nifi processor load average = {}'.format(load_average)
     self.check_thresholds(load_average)
     self.msg += ' | processor_load_average={}{}'.format(
         load_average, self.get_perf_thresholds())
Exemplo n.º 13
0
 def parse_json(self, json_data):
     if not isList(json_data):
         raise UnknownError(
             'non-list returned by Presto for nodes. {0}'.format(
                 support_msg_api()))
     nodes_failing = []
     max_ratio = 0.0
     re_protocol = re.compile('^https?://')
     num_nodes = len(json_data)
     for node_item in json_data:
         recent_failure_ratio = node_item['recentFailureRatio']
         if not isFloat(recent_failure_ratio):
             raise UnknownError(
                 'recentFailureRatio is not a float! {0}'.format(
                     support_msg_api()))
         recent_failure_ratio = float(
             '{0:.2f}'.format(recent_failure_ratio))
         if recent_failure_ratio < 0:
             raise UnknownError('recentFailureRatio < 0 ?!!! {0}'.format(
                 support_msg_api()))
         if recent_failure_ratio > 1:
             raise UnknownError('recentFailureRatio > 1 ?!!! {0}'.format(
                 support_msg_api()))
         if recent_failure_ratio > max_ratio:
             max_ratio = recent_failure_ratio
         if recent_failure_ratio > self.max_ratio:
             uri = node_item['uri']
             uri = re_protocol.sub('', uri)
             nodes_failing += [uri]
             log.info("node '%s' recent failure ratio %f > max ratio %f",
                      node_item['uri'], recent_failure_ratio,
                      self.max_ratio)
         elif recent_failure_ratio:
             log.info(
                 "node '%s' recent failures ratio %f, but less than max ratio threshold of %f",
                 node_item['uri'], recent_failure_ratio, self.max_ratio)
     num_nodes_failing = len(nodes_failing)
     self.msg = 'Presto SQL - worker nodes with recent failure ratio > {0:.2f} = {1:d}'\
                .format(self.max_ratio, num_nodes_failing)
     self.check_thresholds(num_nodes_failing)
     self.msg += ' out of {0:d} nodes'.format(num_nodes)
     if num_nodes < 1:
         self.warning()
         self.msg += ' (< 1 worker found)'
     self.msg += ', max recent failure ratio = {0:.2f}'.format(max_ratio)
     if self.verbose and nodes_failing:
         self.msg += ' [{0}]'.format(','.join(nodes_failing))
     self.msg += ' | num_nodes_failing={0}{1} max_ratio={2:.2f}'\
                 .format(num_nodes_failing, self.get_perf_thresholds(), max_ratio)
Exemplo n.º 14
0
 def process_options(self):
     self.name = self.get_opt('name')
     if not self.name:
         for arg in self.args:
             arg = os.path.basename(arg)
             if arg and arg[0] != '-' and \
                not self.is_interpreter(arg) and \
                arg not in ERRORS and \
                not isFloat(arg):
                 self.name = arg
                 break
     if not self.name:
         self.usage('--name not defined')
     self.name = self.space_regex.sub('_', self.name)
     log.info('name = %s', self.name)
Exemplo n.º 15
0
 def process_options(self):
     self.name = self.get_opt('name')
     if not self.name:
         for arg in self.args:
             arg = os.path.basename(arg)
             if arg and arg[0] != '-' and \
                not self.is_interpreter(arg) and \
                arg not in ERRORS and \
                not isFloat(arg):
                 self.name = arg
                 break
     if not self.name:
         self.usage('--name not defined')
     self.name = self.space_regex.sub('_', self.name)
     log.info('name = %s', self.name)
Exemplo n.º 16
0
 def run(self):
     self.no_args()
     host = self.options.host
     port = self.options.port
     validate_host(host)
     validate_port(port)
     key = self.options.key
     regex = self.options.regex
     if not key:
         self.usage('--key not defined')
     key = key.lstrip('/')
     validate_chars(key, 'key', r'\w\/-')
     if regex:
         validate_regex(regex, 'key')
     self.validate_thresholds(optional=True)
     req = None
     url = 'http://%(host)s:%(port)s/v1/kv/%(key)s' % locals()
     log.debug('GET %s' % url)
     try:
         req = requests.get(url)
     except requests.exceptions.RequestException as _:
         qquit('CRITICAL', _)
     log.debug("response: %s %s" % (req.status_code, req.reason))
     log.debug("content: '%s'" % req.content)
     if req.status_code != 200:
         err = ''
         if req.content and isStr(
                 req.content) and len(req.content.split('\n')) < 2:
             err += ': ' + req.content
         qquit(
             'CRITICAL', "failed to retrieve consul key '%s': '%s' %s%s" %
             (key, req.status_code, req.reason, err))
     value = self.extract_value(req.content)
     log.info("value = '%(value)s'" % locals())
     self.ok()
     self.msg = "consul key '%s' value = '%s'" % (key, value)
     if regex:
         if not re.search(regex, value):
             self.critical()
             self.msg += " (did not match expected regex '%s')" % regex
         #elif self.get_verbose():
         #    self.msg += " (matched regex '%s')" % regex
     self.check_thresholds(value)
     if isFloat(value):
         self.msg += " | '%s'=%s" % (key, value)
Exemplo n.º 17
0
 def check(self, result):
     if not isFloat(result):
         return "(not a float!)"
     result = float(result)
     if self.opts["invert"]:
         if (
             self.thresholds["lower"] is not None
             and self.thresholds["upper"] is not None
             and result >= self.thresholds["lower"]
             and result <= self.thresholds["upper"]
         ):
             return "({0:g} <= {1:g} <= {2:g})".format(self.thresholds["lower"], result, self.thresholds["upper"])
     else:
         if self.thresholds["lower"] is not None and result < self.thresholds["lower"]:
             return "({0:g} < {1:g})".format(result, self.thresholds["lower"])
         if self.thresholds["upper"] is not None and result > self.thresholds["upper"]:
             return "({0:g} > {1:g})".format(result, self.thresholds["upper"])
     return ""
Exemplo n.º 18
0
 def check(self, result):
     if not isFloat(result):
         return '(not a float!)'
     result = float(result)
     if self.opts['invert']:
         if self.thresholds['lower'] is not None and self.thresholds['upper'] is not None and \
            result >= self.thresholds['lower'] and result <= self.thresholds['upper']:
             return '({0:g} <= {1:g} <= {2:g})'.format(
                 self.thresholds['lower'], result, self.thresholds['upper'])
     else:
         if self.thresholds[
                 'lower'] is not None and result < self.thresholds['lower']:
             return '({0:g} < {1:g})'.format(result,
                                             self.thresholds['lower'])
         if self.thresholds[
                 'upper'] is not None and result > self.thresholds['upper']:
             return '({0:g} > {1:g})'.format(result,
                                             self.thresholds['upper'])
     return ''
Exemplo n.º 19
0
 def run(self):
     self.no_args()
     host = self.get_opt('host')
     port = self.get_opt('port')
     validate_host(host)
     validate_port(port)
     key = self.get_opt('key')
     regex = self.get_opt('regex')
     if not key:
         self.usage('--key not defined')
     key = key.lstrip('/')
     validate_chars(key, 'key', r'\w\/-')
     if regex:
         validate_regex(regex, 'key')
     self.validate_thresholds(optional=True)
     req = None
     url = 'http://%(host)s:%(port)s/v1/kv/%(key)s' % locals()
     log.debug('GET %s' % url)
     try:
         req = requests.get(url)
     except requests.exceptions.RequestException as _:
         qquit('CRITICAL', _)
     log.debug("response: %s %s" % (req.status_code, req.reason))
     log.debug("content: '%s'" % req.content)
     if req.status_code != 200:
         err = ''
         if req.content and isStr(req.content) and len(req.content.split('\n')) < 2:
             err += ': ' + req.content
         qquit('CRITICAL', "failed to retrieve consul key '%s': '%s' %s%s" % (key, req.status_code, req.reason, err))
     value = self.extract_value(req.content)
     log.info("value = '%(value)s'" % locals())
     self.ok()
     self.msg = "consul key '%s' value = '%s'" % (key, value)
     if regex:
         if not re.search(regex, value):
             self.critical()
             self.msg += " (did not match expected regex '%s')" % regex
         #elif self.get_verbose():
         #    self.msg += " (matched regex '%s')" % regex
     self.check_thresholds(value)
     if isFloat(value):
         self.msg += " | '%s'=%s" % (key, value)
 def run(self):
     self.no_args()
     host = self.options.host
     port = self.options.port
     validate_host(host)
     validate_port(port)
     key = self.options.key
     regex = self.options.regex
     if not key:
         self.usage("--key not defined")
     key = key.lstrip("/")
     validate_chars(key, "key", r"\w\/-")
     if regex:
         validate_regex(regex, "key")
     self.validate_thresholds(optional=True)
     req = None
     url = "http://%(host)s:%(port)s/v1/kv/%(key)s" % locals()
     log.debug("GET %s" % url)
     try:
         req = requests.get(url)
     except requests.exceptions.RequestException as _:
         qquit("CRITICAL", _)
     log.debug("response: %s %s" % (req.status_code, req.reason))
     log.debug("content: '%s'" % req.content)
     if req.status_code != 200:
         err = ""
         if req.content and isStr(req.content) and len(req.content.split("\n")) < 2:
             err += ": " + req.content
         qquit("CRITICAL", "failed to retrieve consul key '%s': '%s' %s%s" % (key, req.status_code, req.reason, err))
     value = self.extract_value(req.content)
     log.info("value = '%(value)s'" % locals())
     self.ok()
     self.msg = "consul key '%s' value = '%s'" % (key, value)
     if regex:
         if not re.search(regex, value):
             self.critical()
             self.msg += " (did not match expected regex '%s')" % regex
         # elif self.get_verbose():
         #    self.msg += " (matched regex '%s')" % regex
     self.check_thresholds(value)
     if isFloat(value):
         self.msg += " | '%s'=%s" % (key, value)
 def parse_json(self, json_data):
     if not isList(json_data):
         raise UnknownError('non-list returned by Presto for nodes. {0}'.format(support_msg_api()))
     nodes_failing = []
     max_ratio = 0.0
     re_protocol = re.compile('^https?://')
     num_nodes = len(json_data)
     for node_item in json_data:
         recent_failure_ratio = node_item['recentFailureRatio']
         if not isFloat(recent_failure_ratio):
             raise UnknownError('recentFailureRatio is not a float! {0}'.format(support_msg_api()))
         recent_failure_ratio = float('{0:.2f}'.format(recent_failure_ratio))
         if recent_failure_ratio < 0:
             raise UnknownError('recentFailureRatio < 0 ?!!! {0}'.format(support_msg_api()))
         if recent_failure_ratio > 1:
             raise UnknownError('recentFailureRatio > 1 ?!!! {0}'.format(support_msg_api()))
         if recent_failure_ratio > max_ratio:
             max_ratio = recent_failure_ratio
         if recent_failure_ratio > self.max_ratio:
             uri = node_item['uri']
             uri = re_protocol.sub('', uri)
             nodes_failing += [uri]
             log.info("node '%s' recent failure ratio %f > max ratio %f",
                      node_item['uri'], recent_failure_ratio, self.max_ratio)
         elif recent_failure_ratio:
             log.info("node '%s' recent failures ratio %f, but less than max ratio threshold of %f",
                      node_item['uri'], recent_failure_ratio, self.max_ratio)
     num_nodes_failing = len(nodes_failing)
     self.msg = 'Presto SQL - worker nodes with recent failure ratio > {0:.2f} = {1:d}'\
                .format(self.max_ratio, num_nodes_failing)
     self.check_thresholds(num_nodes_failing)
     self.msg += ' out of {0:d} nodes'.format(num_nodes)
     if num_nodes < 1:
         self.warning()
         self.msg += ' (< 1 worker found)'
     self.msg += ', max recent failure ratio = {0:.2f}'.format(max_ratio)
     if self.verbose and nodes_failing:
         self.msg += ' [{0}]'.format(','.join(nodes_failing))
     self.msg += ' | num_nodes_failing={0}{1} max_ratio={2:.2f}'\
                 .format(num_nodes_failing, self.get_perf_thresholds(), max_ratio)
 def parse_metrics(self, json_struct):
     if not isList(json_struct):
         raise ValueError("non-list returned by Attivio AIE Perfmon metrics API (got type '{0}')"\
                          .format(type(json_struct)))
     metrics = {}
     if not json_struct:
         qquit('UNKNOWN', "no matching metrics found for '{0}'".format(self.metrics) + \
                          ", use --list-metrics to check you've specified a correct metric")
     for item in json_struct:
         if not isDict(item):
             raise ValueError("non-dict item found in list returned by Attivio AIE Perfmon API (got type '{0}')"\
                              .format(type(item)))
         if not isList(item['values']):
             raise ValueError("non-list returned for metric value by Attivio AIE Perfmon API (got type '{0}')"\
                              .format(type(item['values'])))
         metric = item['metric']
         log.info('metric = %s', metric)
         if self.skip_metric(item):
             log.info('skipping metric %s due to filters', metric)
             continue
         for key in ('nodeset', 'hostname', 'workflowType', 'workflow',
                     'component', 'path', 'networkInterface'):
             if key in item:
                 val = item[key]
                 log.info('%s = %s', key, val)
                 # Attivio returns network interfaces in form "lo - 127.0.0.1"
                 if key == 'networkInterface':
                     val = val.split()[0]
                 metric += '.{0}'.format(val)
         value = item['values'][0]
         log.info('value = %s\n', value)
         if self.precision and isFloat(value):
             # leaving as string will result in lots of trailing zeros
             value = float('{value:.{precision}f}'.format(
                 value=value, precision=self.precision))
         if metric in metrics:
             qquit('UNKNOWN', "duplicate metric '{metric}' discovered! {support_msg}"\
                              .format(metric=metric, support_msg=support_msg_api()))
         metrics[metric] = value
     return metrics
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         bean = json_data['beans'][0]
         space_used_pc = bean['PercentUsed']
         # the way below is more informative
         #assert type(space_used_pc) == float
         if re.search(r'e-\d+$', str(space_used_pc)):
             space_used_pc = 0
         if not isFloat(space_used_pc):
             raise UnknownError("non-float returned for PercentUsed by namenode '{0}:{1}'"\
                                .format(self.host, self.port))
         if space_used_pc < 0:
             raise UnknownError(
                 'space_used_pc {} < 0'.format(space_used_pc))
         stats = {}
         for stat in ('Total', 'TotalBlocks', 'TotalFiles', 'Used'):
             stats[stat] = bean[stat]
             if not isInt(stats[stat]):
                 raise UnknownError("non-integer returned for {0} by namenode '{1}:{2}'"\
                                    .format(stat, self.host, self.port))
             stats[stat] = int(stats[stat])
         self.ok()
         self.msg = 'HDFS space used = {0:.2f}% ({1}/{2})'\
                    .format(space_used_pc, humanize.naturalsize(stats['Used']), humanize.naturalsize(stats['Total']))
         self.check_thresholds(space_used_pc)
         self.msg += ", in {0:d} files spread across {1:d} blocks".format(
             stats['TotalFiles'], stats['TotalBlocks'])
         self.msg += " | 'HDFS % space used'={0:f}%{1}".format(
             space_used_pc, self.get_perf_thresholds())
         self.msg += " 'HDFS space used'={0:d}b".format(stats['Used'])
         self.msg += " 'HDFS file count'={0:d}".format(stats['TotalFiles'])
         self.msg += " 'HDFS block count'={0:d}".format(
             stats['TotalBlocks'])
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
     except ValueError as _:
         raise UnknownError("invalid json returned for LiveNodes by Namenode '{0}:{1}': {2}"\
                            .format(self.host, self.port, _))
Exemplo n.º 24
0
 def parse_json(self, json_data):
     if not isList(json_data):
         raise UnknownError(
             'non-list returned by Presto for nodes. {0}'.format(
                 support_msg_api()))
     nodes_failing = []
     max_failures = 0.0
     re_protocol = re.compile('^https?://')
     for node_item in json_data:
         recent_failures = node_item['recentFailures']
         if not isFloat(recent_failures):
             raise UnknownError('recentFailures is not a float! {0}'.format(
                 support_msg_api()))
         recent_failures = float('{0:.2f}'.format(recent_failures))
         if recent_failures < 0:
             raise UnknownError('recentFailures < 0 ?!!! {0}'.format(
                 support_msg_api()))
         if recent_failures > max_failures:
             max_failures = recent_failures
         if recent_failures > self.max_failures:
             uri = node_item['uri']
             uri = re_protocol.sub('', uri)
             nodes_failing += [uri]
             log.info("node '%s' recent failures %f > max failures %f",
                      node_item['uri'], recent_failures, self.max_failures)
         elif recent_failures:
             log.info(
                 "node '%s' recent failures %f, but less than max failures threshold of %f",
                 node_item['uri'], recent_failures, self.max_failures)
     num_nodes_failing = len(nodes_failing)
     self.msg = 'Presto SQL worker nodes with recent failures > {0:.2f} = {1:d}'\
                .format(self.max_failures, num_nodes_failing)
     self.check_thresholds(num_nodes_failing)
     self.msg += ', max recent failures per node = {0:.2f}'.format(
         max_failures)
     if self.verbose and nodes_failing:
         self.msg += ' [{0}]'.format(','.join(nodes_failing))
     self.msg += ' | num_nodes_failing={0}{1} max_recent_failures={2:.2f}'\
                 .format(num_nodes_failing, self.get_perf_thresholds(), max_failures)
 def parse_metrics(self, json_struct):
     if not isList(json_struct):
         raise ValueError("non-list returned by Attivio AIE Perfmon metrics API (got type '{0}')"\
                          .format(type(json_struct)))
     metrics = {}
     if not json_struct:
         qquit('UNKNOWN', "no matching metrics found for '{0}'".format(self.metrics) + \
                          ", use --list-metrics to check you've specified a correct metric")
     for item in json_struct:
         if not isDict(item):
             raise ValueError("non-dict item found in list returned by Attivio AIE Perfmon API (got type '{0}')"\
                              .format(type(item)))
         if not isList(item['values']):
             raise ValueError("non-list returned for metric value by Attivio AIE Perfmon API (got type '{0}')"\
                              .format(type(item['values'])))
         metric = item['metric']
         log.info('metric = %s', metric)
         if self.skip_metric(item):
             log.info('skipping metric %s due to filters', metric)
             continue
         for key in ('nodeset', 'hostname', 'workflowType', 'workflow', 'component', 'path', 'networkInterface'):
             if key in item:
                 val = item[key]
                 log.info('%s = %s', key, val)
                 # Attivio returns network interfaces in form "lo - 127.0.0.1"
                 if key == 'networkInterface':
                     val = val.split()[0]
                 metric += '.{0}'.format(val)
         value = item['values'][0]
         log.info('value = %s\n', value)
         if self.precision and isFloat(value):
             # leaving as string will result in lots of trailing zeros
             value = float('{value:.{precision}f}'.format(value=value, precision=self.precision))
         if metric in metrics:
             qquit('UNKNOWN', "duplicate metric '{metric}' discovered! {support_msg}"\
                              .format(metric=metric, support_msg=support_msg_api()))
         metrics[metric] = value
     return metrics
 def parse_json(self, json_data):
     if self.list_jobs:
         jobs = json_data['jobs']
         print('Jenkins Jobs:\n')
         for job in jobs:
             print(job['name'])
         sys.exit(ERRORS['UNKNOWN'])
     # this method has a nicer job not found error message
     # but it's less efficient if querying a Jenkins server with lots of jobs
     #job = None
     #for _ in jobs:
     #    if _['name'].lower() == self.job.lower():
     #        job = _
     #        break
     #if not job:
     #    raise CriticalError("job '{job}' not found. See --list to see available jobs".format(job=self.job))
     health_report = json_data['healthReport']
     if not health_report:
         raise UnknownError(
             "no health report found for job '{job}' (not built yet?)".
             format(job=self.job))
     health_report = health_report[0]
     score = health_report['score']
     if not isFloat(score):
         raise UnknownError(
             "non-numeric score returned in health report for job '{job}'".
             format(job=self.job))
     score = float(score)
     description = health_report['description']
     self.msg += "'{job}' health report score = {score}".format(
         job=self.job, score=score)
     self.check_thresholds(score)
     #self.msg += ", description: '{description}'".format(description=description)
     self.msg += ", {description}".format(description=description)
     self.msg += ' | health_report_score={score}%{thresholds}'\
                 .format(score=score, thresholds=self.get_perf_thresholds(boundary='lower'))
Exemplo n.º 27
0
 def create_perfdata(self):
     perfdata = ''
     if isFloat(self._read_value):
         perfdata = " | '{0}'={1}{2} query_time={3:.7f}s".format(
             self.key, self._read_value, self.get_perf_thresholds(), self._read_timing)
     return perfdata