def process_build_info(self, build_info):
     displayname = build_info['displayName']
     duration = build_info['duration']
     if not isInt(duration):
         raise UnknownError('duration field returned non-integer! {0}'.format(support_msg_api()))
     duration = int(duration) / 1000
     result = build_info['result']
     timestamp = build_info['timestamp']
     if not isInt(timestamp):
         raise UnknownError('timestamp field returned non-integer! {0}'.format(support_msg_api()))
     timestamp = int(timestamp)
     building = build_info['building']
     self.msg += "build {build} status: ".format(build=displayname)
     if building:
         self.unknown()
         self.msg += 'STILL BUILDING!'
         return
     self.msg += result
     if result != 'SUCCESS':
         self.critical()
     self.msg += ', duration={duration} secs'.format(duration=duration)
     self.check_thresholds(duration)
     age = time.time() - (timestamp/1000)
     self.msg += ', age={age} secs'.format(age=sec2human(age))
     if age < 0:
         self.warning()
         self.msg += ' (< 0!)'
     if self.age and age > self.age:
         self.critical()
         self.msg += ' (> {0:d})'.format(self.age)
     self.msg += ' | build_duration={duration}s{perf_thresholds}'.format(duration=duration, \
                                                                  perf_thresholds=self.get_perf_thresholds())
Exemplo n.º 2
0
 def extract_value(self, content):
     json_data = None
     try:
         json_data = json.loads(content)
     except ValueError:
         raise UnknownError("non-json data returned by consul: '%s'. %s" % (content, support_msg_api()))
     value = None
     if not isList(json_data):
         raise UnknownError("non-list returned by consul: '%s'. %s" % (content, support_msg_api()))
     if not json_data:
         raise UnknownError("blank list returned by consul! '%s'. %s" % (content, support_msg_api()))
     if len(json_data) > 1:
         raise UnknownError("more than one key returned by consul! response = '%s'. %s" \
               % (content, support_msg_api()))
     try:
         value = json_data[0]['Value']
     except KeyError:
         raise UnknownError("couldn't find field 'Value' in response from consul: '%s'. %s"
                            % (content, support_msg_api()))
     try:
         # decodestring might be deprecated but decodebytes isn't available on Python 2.7
         #value = base64.decodebytes(value)
         value = base64.decodestring(value)
     except TypeError:
         raise UnknownError("invalid data returned for key '{0}' value = '{1}', failed to base64 decode"
                            .format(self.key, value))
     return value
Exemplo n.º 3
0
 def extract_value(self, content):  # pylint: disable=no-self-use
     json_data = None
     try:
         json_data = json.loads(content)
     except ValueError:
         raise UnknownError("non-json data returned by consul: '%s'. %s" % (content, support_msg_api()))
     value = None
     if not isList(json_data):
         raise UnknownError("non-list returned by consul: '%s'. %s" % (content, support_msg_api()))
     if not json_data:
         raise UnknownError("blank list returned by consul! '%s'. %s" % (content, support_msg_api()))
     if len(json_data) > 1:
         raise UnknownError(
             "more than one key returned by consul! response = '%s'. %s" % (content, support_msg_api())
         )
     try:
         value = json_data[0]["Value"]
     except KeyError:
         raise UnknownError(
             "couldn't find field 'Value' in response from consul: '%s'. %s" % (content, support_msg_api())
         )
     try:
         value = base64.decodestring(value)
     except TypeError as _:
         raise UnknownError(
             "invalid data returned for key '{0}' value = '{1}', failed to base64 decode".format(self.key, value)
         )
     return value
 def extract_value(self, content): # pylint: disable=no-self-use
     json_data = None
     try:
         json_data = json.loads(content)
     except ValueError:
         qquit('UNKNOWN', "non-json data returned by consul: '%s'. %s" % (content, support_msg_api()))
     value = None
     if not isList(json_data):
         qquit('UNKNOWN', "non-list returned by consul: '%s'. %s" % (content, support_msg_api()))
     if not json_data:
         qquit('UNKNOWN', "blank list returned by consul! '%s'. %s" % (content, support_msg_api()))
     if len(json_data) > 1:
         qquit('UNKNOWN', "more than one key returned by consul! response = '%s'. %s" \
               % (content, support_msg_api()))
     try:
         value = json_data[0]['Value']
     except KeyError:
         qquit('UNKNOWN', "couldn't find field 'Value' in response from consul: '%s'. %s" \
               % (content, support_msg_api()))
     try:
         value = base64.decodestring(value)
     except TypeError as _:
         qquit('UNKNOWN', "invalid data returned for key '%(key)s' value = '%(value)s', failed to base64 decode" \
               % locals())
     return value
 def get_stat(node, stat):
     stat_num = node[stat]
     if not isFloat(stat_num):
         raise UnknownError('{stat} is not a float! {msg}'.format(stat=stat, msg=support_msg_api()))
     stat_num = float('{0:.2f}'.format(stat_num))
     if stat_num < 0:
         raise UnknownError('{stat} < 0 ?!!! {msg}'.format(stat=stat, msg=support_msg_api()))
     return stat_num
 def get_recent_failure_ratio(node):
     recent_failure_ratio = node['recentFailureRatio']
     if not isFloat(recent_failure_ratio):
         raise UnknownError('recentFailureRatio is not a float! {0}'.format(support_msg_api()))
     recent_failure_ratio = float('{0:.2f}'.format(recent_failure_ratio))
     if recent_failure_ratio < 0:
         raise UnknownError('recentFailureRatio < 0 ?!!! {0}'.format(support_msg_api()))
     if recent_failure_ratio > 1:
         raise UnknownError('recentFailureRatio > 1 ?!!! {0}'.format(support_msg_api()))
     return recent_failure_ratio
Exemplo n.º 7
0
    def check_read(self, table_conn, row, column, expected=None):
        log.info("getting cell for row '%s' column '%s'", row, column)
        cells = []
        query_time = None
        start = time.time()
        cells = table_conn.cells(row, column, versions=1)
        query_time = (time.time() - start) * 1000
        log.info('query read in %s ms', query_time)

        cell_info = "HBase table '{0}' row '{1}' column '{2}'".format(
            self.table, row, column)

        log.debug('cells returned: %s', cells)
        if not isList(cells):
            qquit('UNKNOWN',
                  'non-list returned for cells. ' + support_msg_api())

        if len(cells) < 1:
            qquit(
                'CRITICAL',
                "no cell value found in {0}, does row / column family combination exist?"
                .format(cell_info))
        elif len(cells) > 1:
            qquit('UNKNOWN',
                  "more than one cell returned! " + support_msg_api())

        value = cells[0]
        log.info('value = %s', value)

        if self.regex:
            log.info(
                "checking cell's value '{0}' against expected regex '{1}'".
                format(value, self.regex))
            if not re.search(self.regex, value):
                qquit(
                    'CRITICAL',
                    "cell value '{0}' (expected regex '{1}') for {2}".format(
                        value, self.regex, cell_info))
        if expected:
            log.info(
                "checking cell's value is exactly expected value '{0}'".format(
                    expected))
            if value != expected:
                qquit(
                    'CRITICAL',
                    "cell value '{0}' (expected '{1}') for {2}".format(
                        value, expected, cell_info))
        self.timings[column] = self.timings.get(column, {})
        self.timings[column]['read'] = max(self.timings[column].get('read', 0),
                                           query_time)
        self.value = value
        return (value, query_time)
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         live_nodes = json_data['beans'][0]['LiveNodes']
         live_node_data = json.loads(live_nodes)
         num_datanodes = len(live_node_data)
         if num_datanodes < 1:
             raise CriticalError("no live datanodes returned by JMX API from namenode '{0}:{1}'"\
                                 .format(self.host, self.port))
         max_blocks = 0
         min_blocks = None
         for datanode in live_node_data:
             blocks = live_node_data[datanode]['numBlocks']
             if not isInt(blocks):
                 raise UnknownError(
                     'numBlocks is not an integer! {0}'.format(
                         support_msg_api()))
             blocks = int(blocks)
             log.info("datanode '%s' has %s blocks", datanode, blocks)
             if blocks > max_blocks:
                 max_blocks = blocks
             if min_blocks is None or blocks < min_blocks:
                 min_blocks = blocks
         log.info("max blocks on a single datanode = %s", max_blocks)
         log.info("min blocks on a single datanode = %s", min_blocks)
         assert min_blocks is not None
         divisor = min_blocks
         if min_blocks < 1:
             log.info(
                 "min blocks < 1, resetting divisor to 1 (% will be very high)"
             )
             divisor = 1
         block_imbalance = float("{0:.2f}".format(
             (max_blocks - min_blocks) / divisor * 100))
         self.msg = '{0}% block imbalance across {1} datanode{2}'\
                    .format(block_imbalance, num_datanodes, plural(num_datanodes))
         self.ok()
         self.check_thresholds(block_imbalance)
         if self.verbose:
             self.msg += ' (min blocks = {0}, max blocks = {1})'.format(
                 min_blocks, max_blocks)
         self.msg += " | block_imbalance={0}%".format(block_imbalance)
         self.msg += self.get_perf_thresholds()
         self.msg += " num_datanodes={0}".format(num_datanodes)
         self.msg += " min_blocks={0}".format(min_blocks)
         self.msg += " max_blocks={0}".format(max_blocks)
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
     except ValueError as _:
         raise UnknownError("invalid json returned for LiveNodes by Namenode '{0}:{1}': {2}"\
                            .format(self.host, self.port, _))
 def get_recent_failure_ratio(node):
     recent_failure_ratio = node['recentFailureRatio']
     if not isFloat(recent_failure_ratio):
         raise UnknownError('recentFailureRatio is not a float! {0}'.format(
             support_msg_api()))
     recent_failure_ratio = float('{0:.2f}'.format(recent_failure_ratio))
     if recent_failure_ratio < 0:
         raise UnknownError('recentFailureRatio < 0 ?!!! {0}'.format(
             support_msg_api()))
     if recent_failure_ratio > 1:
         raise UnknownError('recentFailureRatio > 1 ?!!! {0}'.format(
             support_msg_api()))
     return recent_failure_ratio
Exemplo n.º 10
0
 def parse_json(self, json_data):
     if self.list_jobs:
         print('Jenkins Jobs:\n')
         for job in json_data['jobs']:
             print(job['name'])
         sys.exit(ERRORS['UNKNOWN'])
     if 'lastCompletedBuild' in json_data:
         last_completed_build = json_data['lastCompletedBuild']
         if not last_completed_build:
             raise WarningError(
                 "job '{job}' not built yet".format(job=self.job))
         self.path = '/job/{job}/{number}/api/json'.format(
             job=self.job, number=last_completed_build['number'])
         req = self.query()
         self.process_json(req.content)
         return
     displayname = json_data['displayName']
     duration = json_data['duration']
     if not isInt(duration):
         raise UnknownError(
             'duration field returned non-integer! {0}'.format(
                 support_msg_api()))
     duration = int(duration) / 1000
     result = json_data['result']
     timestamp = json_data['timestamp']
     if not isInt(timestamp):
         raise UnknownError(
             'timestamp field returned non-integer! {0}'.format(
                 support_msg_api()))
     timestamp = int(timestamp)
     building = json_data['building']
     self.msg += "build {build} status: ".format(build=displayname)
     if building:
         self.unknown()
         self.msg += 'STILL BUILDING!'
         return
     self.msg += result
     if result != 'SUCCESS':
         self.critical()
     self.msg += ', duration={duration} secs'.format(duration=duration)
     self.check_thresholds(duration)
     age = time.time() - (timestamp / 1000)
     self.msg += ', age={age} secs'.format(age=sec2human(age))
     if age < 0:
         self.warning()
         self.msg += ' (< 0!)'
     if self.age and age > self.age:
         self.critical()
         self.msg += ' (> {0:d})'.format(self.age)
     self.msg += ' | build_duration={duration}s{perf_thresholds}'.format(
         duration=duration, perf_thresholds=self.get_perf_thresholds())
 def get_peers(content):
     json_data = None
     try:
         json_data = json.loads(content)
     except ValueError:
         raise UnknownError("non-json data returned by consul: '%s'. %s" % (content, support_msg_api()))
     if not isList(json_data):
         raise UnknownError("non-list returned by consul: '%s'. %s" % (content, support_msg_api()))
     if not json_data:
         raise UnknownError("blank list returned by consul! '%s'. %s" % (content, support_msg_api()))
     for peer in json_data:
         log.debug('peer: {0}'.format(peer))
     peers = uniq_list(json_data)
     return peers
 def get_request_ids(self):
     content = self.get('/clusters/{cluster}/requests'.format(cluster=self.cluster))
     try:
         _ = json.loads(content)
         request_ids = []
         for item in _['items']:
             if item['Requests']['cluster_name'] == self.cluster:
                 request_id = item['Requests']['id']
                 if not isInt(request_id):
                     die('request id returned was not an integer! ' + support_msg_api())
                 request_ids.append(request_id)
         return request_ids
     except (KeyError, ValueError) as _:
         die('failed to parse response for request IDs: {0}. '.format(_) + support_msg_api())
Exemplo n.º 13
0
 def run(self):
     (version, cluster_version) = self.get_version()
     if not isVersion(version):
         qquit('UNKNOWN', '{0} version unrecognized \'{1}\'. {2}'\
                          .format(self.software, version, support_msg_api()))
     if not isVersion(cluster_version):
         qquit('UNKNOWN', '{0} cluster version unrecognized \'{1}\'. {2}'\
                          .format(self.software, cluster_version, support_msg_api()))
     self.msg = '{0} version = {1}'.format(self.software, version)
     if self.expected is not None and not re.search(self.expected, version):
         self.msg += " (expected '{0}')".format(self.expected)
         self.critical()
     #super(CheckEtcdVersion, self).run()
     self.msg += ', cluster version = {0}'.format(cluster_version)
Exemplo n.º 14
0
 def run(self):
     (version, cluster_version) = self.get_version()
     if not isVersion(version):
         qquit('UNKNOWN', '{0} version unrecognized \'{1}\'. {2}'\
                          .format(self.software, version, support_msg_api()))
     if not isVersion(cluster_version):
         qquit('UNKNOWN', '{0} cluster version unrecognized \'{1}\'. {2}'\
                          .format(self.software, cluster_version, support_msg_api()))
     self.msg = '{0} version = {1}'.format(self.software, version)
     if self.expected is not None and not re.search(self.expected, version):
         self.msg += " (expected '{0}')".format(self.expected)
         self.critical()
     #super(CheckEtcdVersion, self).run()
     self.msg += ', cluster version = {0}'.format(cluster_version)
 def parse_consul_json(self, name, content):
     json_data = None
     try:
         json_data = json.loads(content)
     except ValueError:
         raise UnknownError("non-json {} data returned by consul at {}:{}: '{}'. {}"\
                            .format(name, self.host, self.port, content, support_msg_api()))
     if not json_data:
         raise UnknownError("blank {} contents returned by consul at {}:{}! '{}'. {}"\
                            .format(name, self.host, self.port, content, support_msg_api()))
     if not isList(json_data):
         raise UnknownError('non-list {} returned by consul at {}:{} for session data. {}'\
                            .format(name, self.host, self.port, support_msg_api()))
     return json_data
 def get_request_ids(self):
     content = self.get('/clusters/{cluster}/requests'.format(cluster=self.cluster))
     try:
         _ = json.loads(content)
         request_ids = []
         for item in _['items']:
             if item['Requests']['cluster_name'] == self.cluster:
                 request_id = item['Requests']['id']
                 if not isInt(request_id):
                     die('request id returned was not an integer! ' + support_msg_api())
                 request_ids.append(request_id)
         return request_ids
     except (KeyError, ValueError) as _:
         die('failed to parse response for request IDs: {0}. '.format(_) + support_msg_api())
 def parse_consul_json(self, name, content):
     json_data = None
     try:
         json_data = json.loads(content)
     except ValueError:
         raise UnknownError("non-json {} data returned by consul at {}:{}: '{}'. {}"\
                            .format(name, self.host, self.port, content, support_msg_api()))
     if not json_data:
         raise UnknownError("blank {} contents returned by consul at {}:{}! '{}'. {}"\
                            .format(name, self.host, self.port, content, support_msg_api()))
     if not isList(json_data):
         raise UnknownError('non-list {} returned by consul at {}:{} for session data. {}'\
                            .format(name, self.host, self.port, support_msg_api()))
     return json_data
Exemplo n.º 18
0
 def parse_json(self, json_data):
     if not isList(json_data):
         raise UnknownError(
             'non-list returned by Presto for nodes. {0}'.format(
                 support_msg_api()))
     nodes_failing = []
     max_ratio = 0.0
     re_protocol = re.compile('^https?://')
     num_nodes = len(json_data)
     for node_item in json_data:
         recent_failure_ratio = node_item['recentFailureRatio']
         if not isFloat(recent_failure_ratio):
             raise UnknownError(
                 'recentFailureRatio is not a float! {0}'.format(
                     support_msg_api()))
         recent_failure_ratio = float(
             '{0:.2f}'.format(recent_failure_ratio))
         if recent_failure_ratio < 0:
             raise UnknownError('recentFailureRatio < 0 ?!!! {0}'.format(
                 support_msg_api()))
         if recent_failure_ratio > 1:
             raise UnknownError('recentFailureRatio > 1 ?!!! {0}'.format(
                 support_msg_api()))
         if recent_failure_ratio > max_ratio:
             max_ratio = recent_failure_ratio
         if recent_failure_ratio > self.max_ratio:
             uri = node_item['uri']
             uri = re_protocol.sub('', uri)
             nodes_failing += [uri]
             log.info("node '%s' recent failure ratio %f > max ratio %f",
                      node_item['uri'], recent_failure_ratio,
                      self.max_ratio)
         elif recent_failure_ratio:
             log.info(
                 "node '%s' recent failures ratio %f, but less than max ratio threshold of %f",
                 node_item['uri'], recent_failure_ratio, self.max_ratio)
     num_nodes_failing = len(nodes_failing)
     self.msg = 'Presto SQL - worker nodes with recent failure ratio > {0:.2f} = {1:d}'\
                .format(self.max_ratio, num_nodes_failing)
     self.check_thresholds(num_nodes_failing)
     self.msg += ' out of {0:d} nodes'.format(num_nodes)
     if num_nodes < 1:
         self.warning()
         self.msg += ' (< 1 worker found)'
     self.msg += ', max recent failure ratio = {0:.2f}'.format(max_ratio)
     if self.verbose and nodes_failing:
         self.msg += ' [{0}]'.format(','.join(nodes_failing))
     self.msg += ' | num_nodes_failing={0}{1} max_ratio={2:.2f}'\
                 .format(num_nodes_failing, self.get_perf_thresholds(), max_ratio)
 def get_version(self):
     content = self.get()
     try:
         json_list = json.loads(content)
         if log.isEnabledFor(logging.DEBUG):
             print(jsonpp(json_list))
             print('=' * 80)
         if not isList(json_list):
             raise ValueError(
                 "non-list returned by API (is type '{0}')".format(
                     type(json_list)))
         json_dict = json_list[0]
         if not isDict(json_dict):
             raise ValueError(
                 "non-dict found inside returned list (is type '{0}')".
                 format(type(json_dict)))
         company_name = json_dict['company_name']
         company_website = json_dict['company_website']
         regex = re.compile(r'Blue\s*Talon', re.I)
         if not regex.match(company_name) and \
            not regex.match(company_website):
             qquit('UNKNOWN', 'Blue Talon name was not found in either company_name or company_website fields' \
                            + ', are you definitely querying a Blue Talon server?')
         build_version = json_dict['build_version']
         update_date = json_dict['update_date']
         api_version = json_dict['api_version']
         if not isVersion(api_version):
             qquit('UNKNOWN', '{0} api version unrecognized \'{1}\'. {2}'\
                              .format(self.software, api_version, support_msg_api()))
         if api_version != self.api_version:
             qquit('UNKNOWN', "unexpected API version '{0}' returned (expected '{1}')"\
                              .format(api_version, self.api_version))
         if self.verbose:
             extra_info = ' revision {revision} build {build}, schema revision = {schema_revision}'\
                           .format(revision=json_dict['revision_no'],
                                   build=json_dict['build_no'],
                                   schema_revision=json_dict['schema_revision'])
             extra_info += ', api version = {api_version}, update date = {update_date}'\
                           .format(api_version=api_version, update_date=update_date)
         else:
             extra_info = ', update date = {update_date}'.format(
                 update_date=update_date)
     except (KeyError, ValueError) as _:
         qquit('UNKNOWN', 'error parsing output from {software}: {exception}: {error}. {support_msg}'\
                          .format(software=self.software,
                                  exception=type(_).__name__,
                                  error=_,
                                  support_msg=support_msg_api()))
     return (build_version, extra_info)
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         live_nodes = json_data['beans'][0]['LiveNodes']
         live_node_data = json.loads(live_nodes)
         num_datanodes = len(live_node_data)
         if num_datanodes < 1:
             raise CriticalError("no live datanodes returned by JMX API from namenode '{0}:{1}'"\
                                 .format(self.host, self.port))
         min_space = None
         max_space = 0
         for datanode in live_node_data:
             used_space = live_node_data[datanode]['usedSpace']
             if not isInt(used_space):
                 raise UnknownError('usedSpace is not an integer! {0}'.format(support_msg_api()))
             used_space = int(used_space)
             log.info("datanode '%s' used space = %s", datanode, used_space)
             if min_space is None or used_space < min_space:
                 min_space = used_space
             if used_space > max_space:
                 max_space = used_space
         divisor = max_space
         if divisor < 1:
             log.info('min used space < 1, resetting divisor to 1 (% will likely be very high)')
             divisor = 1
         assert max_space >= min_space
         largest_imbalance_pc = float('{0:.2f}'.format(((max_space - min_space) / divisor) * 100))
         assert largest_imbalance_pc >= 0
         self.ok()
         self.msg = '{0}% HDFS imbalance on space used'.format(largest_imbalance_pc)
         self.check_thresholds(largest_imbalance_pc)
         self.msg += ' across {0:d} datanode{1}'.format(num_datanodes, plural(num_datanodes))
         if self.verbose:
             self.msg += ', min used space = {0}, max used space = {1}'.format(min_space, max_space)
         if self.verbose and (self.is_warning() or self.is_critical()):
             self.msg += ' [imbalanced nodes: '
             for datanode in live_node_data:
                 used_space = live_node_data[datanode]['usedSpace']
                 if (used_space / max_space * 100) > self.thresholds['warning']['upper']:
                     self.msg += '{0}({1:.2f%}),'.format(datanode, used_space)
             self.msg = self.msg.rstrip(',') + ']'
         self.msg += " | 'HDFS imbalance on space used %'={0}".format(largest_imbalance_pc)
         self.msg += self.get_perf_thresholds()
         self.msg += " num_datanodes={0}".format(num_datanodes)
         self.msg += " min_used_space={0}".format(min_space)
         self.msg += " max_used_space={0}".format(max_space)
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         data = json_data['beans'][0]
         total_blocks = data['TotalBlocks']
         if not isInt(total_blocks):
             raise UnknownError('non-integer returned by NameNode for number of total blocks! {0}'\
                                .format(support_msg_api()))
         total_blocks = int(total_blocks)
         self.msg = 'HDFS Total Blocks = {0:d}'.format(total_blocks)
         self.check_thresholds(total_blocks)
         self.msg += ' | hdfs_total_blocks={0:d}{1}'.format(total_blocks, self.get_perf_thresholds())
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         data = json_data['beans'][0]
         total_blocks = data['TotalBlocks']
         if not isInt(total_blocks):
             raise UnknownError('non-integer returned by NameNode for number of total blocks! {0}'\
                                .format(support_msg_api()))
         total_blocks = int(total_blocks)
         self.msg = 'HDFS Total Blocks = {0:d}'.format(total_blocks)
         self.check_thresholds(total_blocks)
         self.msg += ' | hdfs_total_blocks={0:d}{1}'.format(
             total_blocks, self.get_perf_thresholds())
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
Exemplo n.º 23
0
 def get_tables(self):
     try:
         tables = self.conn.tables()
         if not isList(tables):
             qquit('UNKNOWN', 'table list returned is not a list! ' + support_msg_api())
     except (socket.timeout, ThriftException, HBaseIOError) as _:
         qquit('CRITICAL', 'error while trying to get table list: {0}'.format(_))
 def print_table_regions(self):
     print('=' * self.total_width)
     print('{0:{1}}{2}'.format(self.region_header, self.region_width,
                               self.separator),
           end='')
     print('{0:{1}}{2}'.format(self.start_key_header, self.start_key_width,
                               self.separator),
           end='')
     print('{0:{1}}{2}'.format(self.end_key_header, self.end_key_width,
                               self.separator),
           end='')
     print('{0}'.format(self.server_header))
     print('=' * self.total_width)
     try:
         for region in self._regions:
             print('{0:{1}}{2}'.format(
                 self.bytes_to_str(self.shorten_region_name(
                     region['name'])), self.region_width, self.separator),
                   end='')
             print('{0:{1}}{2}'.format(
                 self.bytes_to_str(region['start_key']),
                 self.start_key_width, self.separator),
                   end='')
             print('{0:{1}}{2}'.format(self.bytes_to_str(region['end_key']),
                                       self.end_key_width, self.separator),
                   end='')
             print('{0}:{1}'.format(region['server_name'], region['port']))
     except KeyError as _:
         die('error parsing region info: {0}. '.format(_) +
             support_msg_api())
     print('\nNumber of Regions: {0:d}'.format(len(self._regions)))
    def parse_json(self, json_data):
        if not isList(json_data):
            raise UnknownError('non-list returned for storage plugins. {}'.format(support_msg_api()))
        if self.get_opt('list'):
            print('Apache Drill storage plugins:\n')
            print('=' * 50)
            print('%-10s\t%-10s\t%s' % ('Name', 'Type', 'Enabled'))
            print('=' * 50 + '\n')
            for storage_plugin in json_data:
                name = storage_plugin['name']
                config = storage_plugin['config']
                plugin_type = config['type']
                enabled = config['enabled']
                print('%-10s\t%-10s\t%s' % (name, plugin_type, enabled))
            sys.exit(ERRORS['UNKNOWN'])

        config = None
        for storage_plugin in json_data:
            name = storage_plugin['name']
            if name == self.storage_plugin:
                config = storage_plugin['config']
                plugin_type = config['type']
                enabled = config['enabled']
                break
        if not config:
            raise CriticalError("Apache Drill storage plugin '{}' not found! See --list for available plugins!"\
                                .format(self.storage_plugin))
        self.msg = "Apache Drill storage plugin '{}' enabled = {}, plugin type = '{}'"\
                   .format(self.storage_plugin, enabled, plugin_type)
        if not enabled:
            self.critical()
        _type = self.get_opt('type')
        if _type and _type != plugin_type:
            self.critical()
            self.msg += " (expected '{}')".format(_type)
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         data = json_data['beans'][0]
         name_dir_statuses = data['NameDirStatuses']
         name_dir_data = json.loads(name_dir_statuses)
         active_dirs = name_dir_data['active']
         failed_dirs = name_dir_data['failed']
         num_active_dirs = len(active_dirs)
         num_failed_dirs = len(failed_dirs)
         self.msg = 'NameNode has {0} failed dir{1}'.format(num_failed_dirs, plural(num_failed_dirs))
         if num_failed_dirs > 0:
             self.warning()
             if self.verbose:
                 self.msg += ' ({0})'.format(', '.join(failed_dirs))
         self.msg += ', {0} active dir{1}'.format(num_active_dirs, plural(num_active_dirs))
         if num_active_dirs < 1:
             self.critical()
         if self.verbose and num_active_dirs > 0:
             self.msg += ' ({0})'.format(', '.join(active_dirs))
         self.msg += ' | num_failed_dirs={0} num_active_dirs={1}'.format(num_failed_dirs, num_active_dirs)
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
     except ValueError as _:
         raise UnknownError("invalid json returned for NameDirStatuses by Namenode '{0}:{1}': {2}"\
                            .format(self.host, self.port, _))
Exemplo n.º 27
0
 def check_version(self, version):
     log.info("checking version '%s'", version)
     if not version:
         qquit('UNKNOWN', '{0} version not found. {1}'.format(self.software, support_msg_api()))
     if not isVersion(version):
         qquit('UNKNOWN', '{0} version unrecognized \'{1}\'. {2}'\
                          .format(self.software, version, support_msg_api()))
     self.msg = '{0} version = {1}'.format(self.software, version)
     if self.expected is not None:
         log.info("verifying version against expected regex '%s'", self.expected)
         if re.match(self.expected, version):
             log.info('version regex matches retrieved version')
         else:
             log.info('version regex does not match retrieved version')
             self.msg += " (expected '{0}')".format(self.expected)
             self.critical()
Exemplo n.º 28
0
 def parse_json(self, json_data):
     if not isList(json_data):
         raise UnknownError('non-list returned by Presto for nodes. {0}'.format(support_msg_api()))
     num_worker_nodes = len(json_data)
     self.msg = 'Presto SQL worker nodes = {0}'.format(num_worker_nodes)
     self.check_thresholds(num_worker_nodes)
     self.msg += ' | num_worker_nodes={0}{1}'.format(num_worker_nodes, self.get_perf_thresholds('lower'))
Exemplo n.º 29
0
    def run(self):
        self.no_args()
        host = self.get_opt('host')
        port = self.get_opt('port')
        validate_host(host)
        validate_port(port)

        url = 'http://%(host)s:%(port)s/oozie/v1/admin/status' % locals()
        req = RequestHandler().get(url)
        # should == NORMAL
        if not isJson(req.content):
            raise UnknownError(
                'non-JSON returned by Oozie server at {0}:{1}'.format(
                    host, port))
        status = None
        try:
            _ = json.loads(req.content)
            status = _['systemMode']
        except KeyError:
            raise UnknownError(
                'systemMode key was not returned in output from Oozie. {0}'.
                format(support_msg_api()))
        self.msg = 'Oozie status = {0}'.format(status)
        if status == 'NORMAL':
            self.ok()
        else:
            self.critical()
Exemplo n.º 30
0
 def parse_json(self, json_data):
     if not isList(json_data):
         raise UnknownError('non-list returned by Atlas metadata server instance at {0}:{1}! {2}'\
                            .format(self.host, self.port, support_msg_api()))
     if len(json_data) < 1:
         raise CriticalError('no entities found!')
     if self.list_entities:
         print('=' * 100)
         print('{0:40} {1:25} {2}'.format('ID', 'Type', 'Name'))
         print('=' * 100)
         for entity in json_data:
             name = self.get_key(entity, 'name')
             _id = self.get_key(entity, 'id')
             _type = self.get_key(entity, 'type')
             print('{0:40} {1:25} {2}'.format(_id, _type, name))
         sys.exit(ERRORS['UNKNOWN'])
     if self.entity_id:
         if len(json_data) > 1:
             raise CriticalError('more than one matching entity returned!')
         json_data = json_data[0]
     elif self.entity_name:
         for entity in json_data:
             if self.entity_name == self.get_key(entity, 'name'):
                 # Recursion - a bit too clever but convenient
                 self.entity_name = None
                 self.entity_id = self.get_key(entity, 'id')
                 self.path += '/{0}'.format(self.entity_id)
                 req = self.query()
                 self.process_json(req.content)
                 # escape recursion
                 return
         raise CriticalError("entity with name '{name}' not found!".format(name=self.entity_name))
     name = self.get_key(json_data, 'name')
     state = self.get_key(json_data, 'state')
     # available for HDFS path but not DB
     #path = self.get_key(json_data, 'path')
     _type = self.get_key(json_data, 'type')
     tags = []
     if 'trait_names' in json_data:
         tags = self.get_key(json_data, 'trait_names')
     #traits = self.get_key(json_data, 'traits')
     version = self.get_key(json_data, 'version')
     modified_date = self.get_key(json_data, 'modified_time')
     self.msg = " '{name}' exists, state='{state}'".format(name=name, state=state)
     if state != 'ACTIVE':
         self.critical()
         self.msg += " (expected 'ACTIVE')"
     self.msg += ", type='{type}'".format(type=_type)
     self.check_type(_type)
     #if self.verbose:
     self.msg += ", tags='{tags}'".format(tags=','.join(tags))
     self.check_missing_tags(tags)
     #if self.verbose:
     #self.msg += ", traits='{traits}'".format(traits=','.join(traits))
     #self.check_missing_traits(traits)
     if self.verbose:
         self.msg += ", modified_date='{modified_date}', version='{version}'".format(
             modified_date=modified_date,
             version=version
         )
 def get_tables(self):
     try:
         tables = self.conn.tables()
         if not isList(tables):
             qquit('UNKNOWN', 'table list returned is not a list! ' + support_msg_api())
     except (socket.error, socket.timeout, ThriftException, HBaseIOError) as _:
         qquit('CRITICAL', 'error while trying to get table list: {0}'.format(_))
Exemplo n.º 32
0
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         bean = json_data['beans'][0]
         space_used_pc = bean['PercentUsed']
         # the way below is more informative
         #assert type(space_used_pc) == float
         if re.search(r'e-\d+$', str(space_used_pc)):
             space_used_pc = 0
         if not isFloat(space_used_pc):
             raise UnknownError("non-float returned for PercentUsed by namenode '{0}:{1}'"\
                                .format(self.host, self.port))
         assert space_used_pc >= 0
         stats = {}
         for stat in ('Total', 'TotalBlocks', 'TotalFiles', 'Used'):
             stats[stat] = bean[stat]
             if not isInt(stats[stat]):
                 raise UnknownError("non-integer returned for {0} by namenode '{1}:{2}'"\
                                    .format(stat, self.host, self.port))
             stats[stat] = int(stats[stat])
         self.ok()
         self.msg = 'HDFS space used = {0:.2f}% ({1}/{2})'\
                    .format(space_used_pc, humanize.naturalsize(stats['Used']), humanize.naturalsize(stats['Total']))
         self.check_thresholds(space_used_pc)
         self.msg += ", in {0:d} files spread across {1:d} blocks".format(stats['TotalFiles'], stats['TotalBlocks'])
         self.msg += " | 'HDFS % space used'={0:f}%{1}".format(space_used_pc, self.get_perf_thresholds())
         self.msg += " 'HDFS space used'={0:d}b".format(stats['Used'])
         self.msg += " 'HDFS file count'={0:d}".format(stats['TotalFiles'])
         self.msg += " 'HDFS block count'={0:d}".format(stats['TotalBlocks'])
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
     except ValueError as _:
         raise UnknownError("invalid json returned for LiveNodes by Namenode '{0}:{1}': {2}"\
                            .format(self.host, self.port, _))
 def parse_json(self, json_data):
     if self.get_opt('logstash_5'):
         pipeline = json_data['pipeline']
     else:
         pipelines = json_data['pipelines']
         if self.get_opt('list'):
             print('Logstash Pipelines:\n')
             for pipeline in pipelines:
                 print(pipeline)
             sys.exit(ERRORS['UNKNOWN'])
         pipeline = None
         if self.pipeline in pipelines:
             pipeline = pipelines[self.pipeline]
     self.msg = "Logstash pipeline '{}' ".format(self.pipeline)
     if pipeline:
         self.msg += 'exists'
         if 'workers' not in pipeline:
             raise UnknownError('workers field not found, Logstash may still be initializing' + \
                                '. If problem persists {}'.format(support_msg_api()))
         workers = pipeline['workers']
         self.msg += ' with {} workers'.format(workers)
         self.check_thresholds(workers)
         if not self.get_opt('logstash_5'):
             dead_letter_queue_enabled = pipeline['dead_letter_queue_enabled']
             self.msg += ', dead letter queue enabled: {}'.format(dead_letter_queue_enabled)
             if self.get_opt('dead_letter_queue_enabled') and not dead_letter_queue_enabled:
                 self.warning()
                 self.msg += ' (expected True)'
         batch_delay = pipeline['batch_delay']
         batch_size = pipeline['batch_size']
         self.msg += ', batch delay: {}, batch size: {}'.format(batch_delay, batch_size)
     else:
         self.critical()
         self.msg += 'does not exist!'
Exemplo n.º 34
0
 def get_version(self):
     url = 'http://{host}:{port}/solr/admin/info/system'.format(host=self.host, port=self.port)
     log.debug('GET %s', url)
     try:
         req = requests.get(url)
     except requests.exceptions.RequestException as _:
         qquit('CRITICAL', _)
     log.debug('response: %s %s', req.status_code, req.reason)
     log.debug('content:\n%s\n%s\n%s', '='*80, req.content.strip(), '='*80)
     if req.status_code != 200:
         qquit('CRITICAL', '%s %s' % (req.status_code, req.reason))
     # versions 7.0+
     if isJson(req.content):
         json_data = json.loads(req.content)
         version = json_data['lucene']['solr-spec-version']
     else:
         soup = BeautifulSoup(req.content, 'html.parser')
         if log.isEnabledFor(logging.DEBUG):
             log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80))
         try:
             version = soup.find('str', {'name':'solr-spec-version'}).text
         except (AttributeError, TypeError) as _:
             qquit('UNKNOWN', 'failed to find parse Solr output. {0}\n{1}'\
                              .format(support_msg_api(), traceback.format_exc()))
     return version
 def parse_scheduled_request(content):
     try:
         _ = json.loads(content)
         if _['RequestSchedule']['last_execution_status'] == 'COMPLETED':
             log.info('COMPLETED')
             return 'COMPLETED'
         for item in _['RequestSchedule']['batch']['batch_requests']:
             request_status = 'NO STATUS YET'
             if 'request_status' in item:
                 request_status = item['request_status']
             if request_status == 'COMPLETED':
                 continue
             request_body = item['request_body']
             request_body_dict = json.loads(request_body)
             command = request_body_dict['RequestInfo']['command']
             context = request_body_dict['RequestInfo']['context']
             log.info('{request_status}: {command}: {context}'.format(
                 request_status=request_status,
                 command=command,
                 context=context))
             if request_status != 'ABORTED':
                 return 'IN_PROGRESS'
     except (KeyError, ValueError) as _:
         die('parsing schedule request status failed: ' + str(_) + '. ' +
             support_msg_api())
Exemplo n.º 36
0
 def parse_json(self, json_data):
     if not isList(json_data):
         raise UnknownError(
             'non-list returned for config settings. {}'.format(
                 support_msg_api()))
     if self.list_config:
         print('Apache Drill config settings:\n')
         for config in json_data:
             print('{} = {}'.format(config['name'], config['value']))
         sys.exit(ERRORS['UNKNOWN'])
     value = None
     for config in json_data:
         name = config['name']
         if name == self.config_key:
             value = config['value']
             break
     if value is None:
         raise UnknownError(
             "config key '{}' not found. See --list for all config keys".
             format(self.config_key))
     # intentionally using name instead of self.config_key to cause NameError if not set or make error more visible if wrong key match
     self.msg = "Apache Drill config '{}' = '{}'".format(name, value)
     if re.match(str(self.expected_value), str(value), re.I):
         self.ok()
     else:
         self.critical()
         self.msg += " (expected '{}')".format(self.expected_value)
Exemplo n.º 37
0
 def get_version(self):
     log.info('querying %s', self.software)
     url = 'http://{host}:{port}/version'.format(host=self.host,
                                                 port=self.port)
     log.debug('GET %s', url)
     try:
         req = requests.get(url)
     except requests.exceptions.RequestException as _:
         qquit('CRITICAL', _)
     log.debug("response: %s %s", req.status_code, req.reason)
     log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(),
               '=' * 80)
     # Special handling for Nginx, expecting 404 rather than usual 200
     if req.status_code != 404:
         qquit(
             'CRITICAL',
             '{0} {1} (expecting 404)'.format(req.status_code, req.reason))
     soup = BeautifulSoup(req.content, 'html.parser')
     if log.isEnabledFor(logging.DEBUG):
         log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(
             soup.prettify(), '=' * 80))
     try:
         version = soup.findAll('center')[1].text
     except (AttributeError, TypeError) as _:
         qquit('UNKNOWN', 'failed to find parse {0} output. {1}\n{2}'\
                          .format(self.software, support_msg_api(), traceback.format_exc()))
     if '/' in version:
         version = version.split('/')[1]
     return version
Exemplo n.º 38
0
 def get_version(self):
     url = 'http://{host}:{port}/solr/admin/info/system'.format(
         host=self.host, port=self.port)
     log.debug('GET %s', url)
     try:
         req = requests.get(url)
     except requests.exceptions.RequestException as _:
         qquit('CRITICAL', _)
     log.debug('response: %s %s', req.status_code, req.reason)
     log.debug('content:\n%s\n%s\n%s', '=' * 80, req.content.strip(),
               '=' * 80)
     if req.status_code != 200:
         qquit('CRITICAL', '%s %s' % (req.status_code, req.reason))
     # versions 7.0+
     if isJson(req.content):
         json_data = json.loads(req.content)
         version = json_data['lucene']['solr-spec-version']
     else:
         soup = BeautifulSoup(req.content, 'html.parser')
         if log.isEnabledFor(logging.DEBUG):
             log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(
                 soup.prettify(), '=' * 80))
         try:
             version = soup.find('str', {'name': 'solr-spec-version'}).text
         except (AttributeError, TypeError) as _:
             qquit('UNKNOWN', 'failed to find parse Solr output. {0}\n{1}'\
                              .format(support_msg_api(), traceback.format_exc()))
     return version
 def run(self):
     url = '{protocol}://{host}:{port}/rest/ingestApi/getSessionCount'.format(host=self.host,
                                                                              port=self.port,
                                                                              protocol=self.protocol)
     log.debug('GET %s', url)
     try:
         req = requests.get(url)
     except requests.exceptions.RequestException as _:
         errhint = ''
         if 'BadStatusLine' in str(_.message):
             errhint = ' (possibly connecting to an SSL secured port without using --ssl?)'
         elif self.protocol == 'https' and 'unknown protocol' in str(_.message):
             errhint = ' (possibly connecting to a plain HTTP port with the -S / --ssl switch enabled?)'
         qquit('CRITICAL', str(_) + errhint)
     log.debug("response: %s %s", req.status_code, req.reason)
     log.debug("content:\n%s\n%s\n%s", '='*80, req.content.strip(), '='*80)
     if req.status_code != 200:
         qquit('CRITICAL', '{0} {1}'.format(req.status_code, req.reason))
     try:
         count = req.content.strip()
         if not isInt(count):
             raise ValueError('non-integer value returned by Attivio AIE')
         count = int(count)
         self.msg = '{software} ingest session count = {count}'.format(software=self.software, count=count)
         self.check_thresholds(count)
     except (KeyError, ValueError):
         qquit('UNKNOWN', 'error parsing output from {software}: {exception}: {error}. {support_msg}'\
                          .format(software=self.software,
                                  exception=type(_).__name__,
                                  error=_,
                                  support_msg=support_msg_api()))
     self.msg += ' | ingest_session_count={0:d}{thresholds}'.format(count, thresholds=self.get_perf_thresholds())
Exemplo n.º 40
0
 def parse_json(self, json_data):
     if not isList(json_data):
         raise UnknownError(
             'non-list returned by Presto for nodes. {0}'.format(
                 support_msg_api()))
     nodes_lagging = []
     max_lag = 0
     re_protocol = re.compile('^https?://')
     for node_item in json_data:
         last_response_time = node_item['lastResponseTime']
         last_response_datetime = datetime.strptime(
             last_response_time, '%Y-%m-%dT%H:%M:%S.%fZ')
         timedelta = datetime.utcnow() - last_response_datetime
         response_age = int(timedelta.total_seconds())
         if response_age > max_lag:
             max_lag = response_age
         if response_age > self.max_age:
             uri = node_item['uri']
             uri = re_protocol.sub('', uri)
             nodes_lagging += [uri]
             log.info(
                 "node '%s' last response age %d secs > max age %s secs",
                 node_item['uri'], response_age, self.max_age)
         else:
             log.info("node '%s' last response age %d secs",
                      node_item['uri'], response_age)
     num_nodes_lagging = len(nodes_lagging)
     self.msg = 'Presto SQL worker nodes with response timestamps older than {0:d} secs = {1:d}'\
                .format(self.max_age, num_nodes_lagging)
     self.check_thresholds(num_nodes_lagging)
     self.msg += ', current max response age = {0:.2f} secs'.format(max_lag)
     if self.verbose and nodes_lagging:
         self.msg += ' [{0}]'.format(', '.join(nodes_lagging))
     self.msg += ' | num_nodes_lagging={0}{1} max_response_age={2:.2f}s'\
                 .format(num_nodes_lagging, self.get_perf_thresholds(), max_lag)
Exemplo n.º 41
0
 def check_version(self, version):
     log.info("checking version '%s'", version)
     if not version:
         qquit('UNKNOWN', '{0} version not found. {1}'.format(self.software, support_msg_api()))
     if not isVersion(version):
         qquit('UNKNOWN', '{0} version unrecognized \'{1}\'. {2}'\
                          .format(self.software, version, support_msg_api()))
     self.msg = '{0} version = {1}'.format(self.software, version)
     if self.expected is not None:
         log.info("verifying version against expected regex '%s'", self.expected)
         if re.match(self.expected, str(version)):
             log.info('version regex matches retrieved version')
         else:
             log.info('version regex does not match retrieved version')
             self.msg += " (expected '{0}')".format(self.expected)
             self.critical()
 def parse_json(self, json_data):
     data = json_data['value']
     try:
         nodes = data['nodes']
     except KeyError:
         raise UnknownError('nodes field not found, are you trying to run this on an old ' +
                            'Selenium Hub <= 3.x or Selenoid? That information is not available in those APIs')
     if not isList(nodes):
         raise UnknownError('nodes field is not a list as expected. {}'.format(support_msg_api()))
     total_nodes = 0
     available_nodes = 0
     for node in nodes:
         if self.browser:
             supports_browser = False
             for slot in node['slots']:
                 if slot['stereotype']['browserName'].lower() == self.browser.lower():
                     supports_browser = True
                     break
             if not supports_browser:
                 continue
         total_nodes += 1
         if node['availability'] == 'UP':
             available_nodes += 1
     self.ok()
     self.msg = 'Selenium Hub '
     if self.browser:
         self.msg += "'{}' ".format(self.browser)
     self.msg += 'nodes available = {}/{}'.format(available_nodes, total_nodes)
     self.check_thresholds(available_nodes)
     self.msg += ' | nodes_available={}{} nodes_total={}'\
                 .format(available_nodes,
                         self.get_perf_thresholds(boundary='lower'),
                         total_nodes)
Exemplo n.º 43
0
 def run(self):
     url = '{protocol}://{host}:{port}/rest/ingestApi/getSessionCount'.format(
         host=self.host, port=self.port, protocol=self.protocol)
     log.debug('GET %s', url)
     try:
         req = requests.get(url)
     except requests.exceptions.RequestException as _:
         errhint = ''
         if 'BadStatusLine' in str(_.message):
             errhint = ' (possibly connecting to an SSL secured port without using --ssl?)'
         elif self.protocol == 'https' and 'unknown protocol' in str(
                 _.message):
             errhint = ' (possibly connecting to a plain HTTP port with the -S / --ssl switch enabled?)'
         qquit('CRITICAL', str(_) + errhint)
     log.debug("response: %s %s", req.status_code, req.reason)
     log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(),
               '=' * 80)
     if req.status_code != 200:
         qquit('CRITICAL', '{0} {1}'.format(req.status_code, req.reason))
     try:
         count = req.content.strip()
         if not isInt(count):
             raise ValueError('non-integer value returned by Attivio AIE')
         count = int(count)
         self.msg = '{software} ingest session count = {count}'.format(
             software=self.software, count=count)
         self.check_thresholds(count)
     except (KeyError, ValueError):
         qquit('UNKNOWN', 'error parsing output from {software}: {exception}: {error}. {support_msg}'\
                          .format(software=self.software,
                                  exception=type(_).__name__,
                                  error=_,
                                  support_msg=support_msg_api()))
     self.msg += ' | ingest_session_count={0:d}{thresholds}'.format(
         count, thresholds=self.get_perf_thresholds())
Exemplo n.º 44
0
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         data = json_data['beans'][0]
         name_dir_statuses = data['NameDirStatuses']
         name_dir_data = json.loads(name_dir_statuses)
         active_dirs = name_dir_data['active']
         failed_dirs = name_dir_data['failed']
         num_active_dirs = len(active_dirs)
         num_failed_dirs = len(failed_dirs)
         self.msg = 'NameNode has {0} failed dir{1}'.format(
             num_failed_dirs, plural(num_failed_dirs))
         if num_failed_dirs > 0:
             self.warning()
             if self.verbose:
                 self.msg += ' ({0})'.format(', '.join(failed_dirs))
         self.msg += ', {0} active dir{1}'.format(num_active_dirs,
                                                  plural(num_active_dirs))
         if num_active_dirs < 1:
             self.critical()
         if self.verbose and num_active_dirs > 0:
             self.msg += ' ({0})'.format(', '.join(active_dirs))
         self.msg += ' | num_failed_dirs={0} num_active_dirs={1}'.format(
             num_failed_dirs, num_active_dirs)
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
     except ValueError as _:
         raise UnknownError("invalid json returned for NameDirStatuses by Namenode '{0}:{1}': {2}"\
                            .format(self.host, self.port, _))
Exemplo n.º 45
0
 def parse_json(self, json_data):
     if not isList(json_data):
         raise UnknownError('non-list returned by Atlas metadata server instance at {0}:{1}! {2}'\
                            .format(self.host, self.port, support_msg_api()))
     if len(json_data) < 1:
         raise CriticalError('no entities found!')
     if self.list_entities:
         print('=' * 100)
         print('{0:40} {1:25} {2}'.format('ID', 'Type', 'Name'))
         print('=' * 100)
         for entity in json_data:
             name = self.get_key(entity, 'name')
             _id = self.get_key(entity, 'id')
             _type = self.get_key(entity, 'type')
             print('{0:40} {1:25} {2}'.format(_id, _type, name))
         sys.exit(ERRORS['UNKNOWN'])
     if self.entity_id:
         if len(json_data) > 1:
             raise CriticalError('more than one matching entity returned!')
         json_data = json_data[0]
     elif self.entity_name:
         for entity in json_data:
             if self.entity_name == self.get_key(entity, 'name'):
                 # Recursion - a bit too clever but convenient
                 self.entity_name = None
                 self.entity_id = self.get_key(entity, 'id')
                 self.path += '/{0}'.format(self.entity_id)
                 req = self.query()
                 self.process_json(req.content)
                 # escape recursion
                 return
         raise CriticalError("entity with name '{name}' not found!".format(name=self.entity_name))
     name = self.get_key(json_data, 'name')
     state = self.get_key(json_data, 'state')
     # available for HDFS path but not DB
     #path = self.get_key(json_data, 'path')
     _type = self.get_key(json_data, 'type')
     tags = []
     if 'trait_names' in json_data:
         tags = self.get_key(json_data, 'trait_names')
     #traits = self.get_key(json_data, 'traits')
     version = self.get_key(json_data, 'version')
     modified_date = self.get_key(json_data, 'modified_time')
     self.msg += " '{name}' exists, state='{state}'".format(name=name, state=state)
     if state != 'ACTIVE':
         self.critical()
         self.msg += " (expected 'ACTIVE')"
     self.msg += ", type='{type}'".format(type=_type)
     self.check_type(_type)
     #if self.verbose:
     self.msg += ", tags='{tags}'".format(tags=','.join(tags))
     self.check_missing_tags(tags)
     #if self.verbose:
     #self.msg += ", traits='{traits}'".format(traits=','.join(traits))
     #self.check_missing_traits(traits)
     if self.verbose:
         self.msg += ", modified_date='{modified_date}', version='{version}'".format(
             modified_date=modified_date,
             version=version
         )
 def get_key(self, json_data, key):
     try:
         return json_data[key]
     except KeyError:
         raise UnknownError('\'{0}\' key was not returned in output from '.format(key) +
                            'HiveServer2 Interactive instance at {0}:{1}. {2}'\
                            .format(self.host, self.port, support_msg_api()))
 def parse_json(self, json_data):
     if not isList(json_data):
         raise UnknownError('non-list returned by Presto for nodes. {0}'.format(support_msg_api()))
     num_worker_nodes = len(json_data)
     self.msg = 'Presto SQL worker nodes = {0}'.format(num_worker_nodes)
     self.check_thresholds(num_worker_nodes)
     self.msg += ' | num_worker_nodes={0}{1}'.format(num_worker_nodes, self.get_perf_thresholds('lower'))
Exemplo n.º 48
0
 def get_key(self, json_data, key):
     try:
         return json_data[key]
     except KeyError:
         raise UnknownError('\'{0}\' key was not returned in output from '.format(key) +
                            'Atlas metadata server instance at {0}:{1}. {2}'\
                            .format(self.host, self.port, support_msg_api()))
Exemplo n.º 49
0
 def run(self):
     url = 'https://api.travis-ci.org/repos/{repo}/builds'.format(
         repo=self.repo)
     log.debug('GET %s' % url)
     try:
         req = requests.get(url)
     except requests.exceptions.RequestException as _:
         raise CriticalError(_)
     log.debug("response: %s %s", req.status_code, req.reason)
     log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(),
               '=' * 80)
     if req.status_code != 200:
         raise CriticalError("%s %s" % (req.status_code, req.reason))
     if log.isEnabledFor(logging.DEBUG):
         log.debug("\n{0}".format(jsonpp(req.content)))
     try:
         self.parse_results(req.content)
     except (KeyError, ValueError) as _:
         exception = traceback.format_exc().split('\n')[-2]
         # this covers up the traceback info and makes it harder to debug
         #raise UnknownError('failed to parse expected json response from Travis CI API: {0}'.format(exception))
         qquit(
             'UNKNOWN',
             'failed to parse expected json response from Travis CI API: {0}. {1}'
             .format(exception, support_msg_api()))
 def parse_json(self, json_data):
     if not isList(json_data):
         raise UnknownError('non-list returned by Presto for nodes. {0}'.format(support_msg_api()))
     nodes_lagging = []
     max_lag = 0
     re_protocol = re.compile('^https?://')
     num_nodes = len(json_data)
     for node_item in json_data:
         last_response_time = node_item['lastResponseTime']
         last_response_datetime = datetime.strptime(last_response_time, '%Y-%m-%dT%H:%M:%S.%fZ')
         timedelta = datetime.utcnow() - last_response_datetime
         response_age = int(timedelta.total_seconds())
         if response_age > max_lag:
             max_lag = response_age
         if response_age > self.max_age:
             uri = node_item['uri']
             uri = re_protocol.sub('', uri)
             nodes_lagging += [uri]
             log.info("node '%s' last response age %d secs > max age %s secs",
                      node_item['uri'], response_age, self.max_age)
         else:
             log.info("node '%s' last response age %d secs", node_item['uri'], response_age)
     num_nodes_lagging = len(nodes_lagging)
     self.msg = 'Presto SQL - worker nodes with response timestamps older than {0:d} secs = {1:d}'\
                .format(self.max_age, num_nodes_lagging)
     self.check_thresholds(num_nodes_lagging)
     self.msg += ' out of {0:d} nodes'.format(num_nodes)
     if num_nodes < 1:
         self.warning()
         self.msg += ' (< 1 worker found)'
     self.msg += ', current max response age = {0:.2f} secs'.format(max_lag)
     if self.verbose and nodes_lagging:
         self.msg += ' [{0}]'.format(', '.join(nodes_lagging))
     self.msg += ' | num_nodes_lagging={0}{1} max_response_age={2:.2f}s'\
                 .format(num_nodes_lagging, self.get_perf_thresholds(), max_lag)
    def print_results(self, term, limit=None):
        data = self.search(term, limit)
        results = {}
        longest_name = 8
        try:
            # collect in dict to order by stars like normal docker search command
            for item in data['results']:
                star = item['star_count']
                name = item['name']
                if len(name) > longest_name:
                    longest_name = len(name)
                if not isInt(star):
                    die("star count '{0}' for repo '{1}' is not an integer! {2}"
                        .format(star, name, support_msg_api()))
                results[star] = results.get(star, {})
                results[star][name] = results[star].get(name, {})
                result = {}
                result['description'] = item['description']
                result['official'] = '[OK]' if item['is_official'] else ''
                # docker search doesn't output this so neither will I
                #result['trusted'] = result['is_trusted']
                result['automated'] = '[OK]' if item['is_automated'] else ''
                results[star][name] = result
            # mimicking out spacing from 'docker search' command
            if not self.quiet:
                print('{0:{5}s}   {1:45s}   {2:7s}   {3:8s}   {4:10s}'.format(
                    'NAME', 'DESCRIPTION', 'STARS', 'OFFICIAL', 'AUTOMATED',
                    longest_name))
        except KeyError as _:
            die('failed to parse results fields from data returned by DockerHub '
                + '(format may have changed?): {0}'.format(_))
        except IOError as _:
            if str(_) == '[Errno 32] Broken pipe':
                pass
            else:
                raise

        def truncate(mystr, length):
            if len(mystr) > length:
                mystr = mystr[0:length - 3] + '...'
            return mystr

        for star in reversed(sorted(results)):
            for name in sorted(results[star]):
                if self.quiet:
                    print(name.encode('utf-8'))
                else:
                    desc = truncate(results[star][name]['description'], 45)
                    print('{0:{5}s}   {1:45s}   {2:<7d}   {3:8s}   {4:10s}'.
                          format(name.encode('utf-8'), desc.encode('utf-8'),
                                 star, results[star][name]['official'],
                                 results[star][name]['automated'],
                                 longest_name))
        if self.verbose and not self.quiet:
            try:
                print('\nResults Shown: {0}\nTotal Results: {1}'.format(
                    len(data['results']), data['num_results']))
            except KeyError as _:
                die('failed to parse get total results count from data returned by DockerHub '
                    + '(format may have changed?): {0}'.format(_))
Exemplo n.º 52
0
 def parse_builds(self, content):
     log.debug('parsing build info')
     build = None
     collected_builds = []
     json_data = json.loads(content)
     if not json_data or \
        'builds' not in json_data or \
        not json_data['builds']:
         qquit(
             'UNKNOWN', "no Travis CI builds returned by the Travis API." +
             " Either the specified repo '{0}' doesn't exist".format(
                 self.repo) + " or no builds have happened yet?" +
             " Also remember the repo is case sensitive, for example 'harisekhon/nagios-plugins' returns this"
             +
             " blank build set whereas 'HariSekhon/nagios-plugins' succeeds"
             + " in returning latest builds information")
     builds = json_data['builds']
     # get latest finished failed build
     last_build_number = None
     found_newer_passing_build = False
     for _ in builds:
         # API returns most recent build first
         # extra check to make sure we're getting the very latest build number and API hasn't changed
         build_number = _['number']
         if not isInt(build_number):
             raise UnknownError('build number returned is not an integer!')
         build_number = int(build_number)
         if last_build_number is None:
             last_build_number = int(build_number) + 1
         if build_number >= last_build_number:
             raise UnknownError('build number returned is out of sequence, cannot be >= last build returned' + \
                                '{0}'.format(support_msg_api()))
         last_build_number = build_number
         if self.completed:
             if len(collected_builds) < self.num and _['state'] in (
                     'passed', 'finished', 'failed', 'errored'):
                 collected_builds.append(_)
         elif self.failed:
             if _['state'] == 'passed':
                 if not collected_builds and not found_newer_passing_build:
                     log.warning("found more recent successful build #%s with state = '%s'" + \
                                 ", you may not need to debug this build any more", _['number'], _['state'])
                     found_newer_passing_build = True
             elif _['state'] in ('failed', 'errored'):
                 if len(collected_builds) < self.num:
                     collected_builds.append(_)
                     # by continuing to iterate through the rest of the builds we can check
                     # their last_build numbers are descending for extra sanity checking
                     #break
         elif len(collected_builds) < self.num:
             collected_builds.append(_)
             # by continuing to iterate through the rest of the builds we can check
             # their last_build numbers are descending for extra sanity checking
             #break
     if not collected_builds:
         qquit('UNKNOWN', 'no recent builds found')
     if log.isEnabledFor(logging.DEBUG):
         for build in collected_builds:
             log.debug("build:\n%s", jsonpp(build))
     return collected_builds
 def print_table_regions(self):
     print('=' * self.total_width)
     print('{0:{1}}{2}'.format(self.region_header,
                               self.region_width,
                               self.separator),
           end='')
     print('{0:{1}}{2}'.format(self.start_key_header,
                               self.start_key_width,
                               self.separator),
           end='')
     print('{0:{1}}{2}'.format(self.end_key_header,
                               self.end_key_width,
                               self.separator),
           end='')
     print('{0}'.format(self.server_header))
     print('=' * self.total_width)
     try:
         for region in self._regions:
             print('{0:{1}}{2}'.format(self.bytes_to_str(self.shorten_region_name(region['name'])),
                                       self.region_width,
                                       self.separator),
                   end='')
             print('{0:{1}}{2}'.format(self.bytes_to_str(region['start_key']),
                                       self.start_key_width,
                                       self.separator),
                   end='')
             print('{0:{1}}{2}'.format(self.bytes_to_str(region['end_key']),
                                       self.end_key_width,
                                       self.separator),
                   end='')
             print('{0}:{1}'.format(region['server_name'], region['port']))
     except KeyError as _:
         die('error parsing region info: {0}. '.format(_) + support_msg_api())
     print('\nNumber of Regions: {0:d}'.format(len(self._regions)))
    def check_table_regions(self):
        log.info('checking regions for table \'%s\'', self.table)
        regions = None
        try:
            table = self.conn.table(self.table)
            regions = table.regions()
        except HBaseIOError as _:
            #if 'org.apache.hadoop.hbase.TableNotFoundException' in _.message:
            if 'TableNotFoundException' in _.message:
                qquit('CRITICAL', 'table \'{0}\' does not exist'.format(self.table))
            else:
                qquit('CRITICAL', _)
        except (socket.error, socket.timeout, ThriftException) as _:
            qquit('CRITICAL', _)

        if log.isEnabledFor(logging.DEBUG):
            log.debug('%s', jsonpp(regions))
        if not regions:
            qquit('CRITICAL', 'failed to get regions for table \'{0}\''.format(self.table))
        if not isList(regions):
            qquit('UNKNOWN', 'region info returned is not a list! ' + support_msg_api())
        num_regions = len(regions)
        log.info('num regions: %s', num_regions)

        self.msg = 'HBase table \'{0}\' has {1} region{2}'.format(self.table, num_regions, plural(num_regions))
        self.check_thresholds(num_regions)

        num_unassigned_regions = 0
        for region in regions:
            try:
                if not region['server_name']:
                    #log.debug('region \'%s\' is not assigned to any server', region['name'])
                    num_unassigned_regions += 1
            except KeyError as _:
                qquit('UNKNOWN', 'failed to find server assigned to region. ' + support_msg_api())
        log.info('num unassigned regions: %s', num_unassigned_regions)
        self.msg += ', {0} unassigned region{1}'.format(num_unassigned_regions, plural(num_unassigned_regions))
        if num_unassigned_regions > 0:
            self.warning()
            self.msg += '!'

        self.msg += ' |'
        self.msg += ' num_regions={0}'.format(num_regions) + self.get_perf_thresholds(boundary='lower')
        self.msg += ' num_unassigned_regions={0};1;0'.format(num_unassigned_regions)
        log.info('finished, closing connection')
        self.conn.close()
 def extract_response_message(response_dict):
     try:
         return'{0}: {1}. '.format(response_dict['status']['responseCode'],
                                   response_dict['status']['responseMessage'])
     except KeyError:
         log.warn('failed to extract responseCode/responseMessage for additional error information. ' \
                  + support_msg_api())
         return ''
 def parse_json(self, json_data):
     num_executors = json_data['numExecutors']
     if not isInt(num_executors):
         raise UnknownError('non-integer returned by Jenkins. {0}'.format(support_msg_api()))
     self.msg += '{:d}'.format(num_executors)
     self.check_thresholds(num_executors)
     self.msg += ' | num_executors={0:d}'.format(num_executors)
     self.msg += self.get_perf_thresholds(boundary='lower')
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         live_nodes = json_data['beans'][0]['LiveNodes']
         live_node_data = json.loads(live_nodes)
         num_datanodes = len(live_node_data)
         if num_datanodes < 1:
             raise CriticalError("no live datanodes returned by JMX API from namenode '{0}:{1}'"\
                                 .format(self.host, self.port))
         max_blocks = 0
         min_blocks = None
         for datanode in live_node_data:
             blocks = live_node_data[datanode]['numBlocks']
             if not isInt(blocks):
                 raise UnknownError('numBlocks is not an integer! {0}'.format(support_msg_api()))
             blocks = int(blocks)
             log.info("datanode '%s' has %s blocks", datanode, blocks)
             if blocks > max_blocks:
                 max_blocks = blocks
             if min_blocks is None or blocks < min_blocks:
                 min_blocks = blocks
         log.info("max blocks on a single datanode = %s", max_blocks)
         log.info("min blocks on a single datanode = %s", min_blocks)
         assert min_blocks is not None
         divisor = min_blocks
         if min_blocks < 1:
             log.info("min blocks < 1, resetting divisor to 1 (% will be very high)")
             divisor = 1
         block_imbalance = float("{0:.2f}".format((max_blocks - min_blocks) / divisor * 100))
         self.msg = '{0}% block imbalance across {1} datanode{2}'\
                    .format(block_imbalance, num_datanodes, plural(num_datanodes))
         self.ok()
         self.check_thresholds(block_imbalance)
         if self.verbose:
             self.msg += ' (min blocks = {0}, max blocks = {1})'.format(min_blocks, max_blocks)
         self.msg += " | block_imbalance={0}%".format(block_imbalance)
         self.msg += self.get_perf_thresholds()
         self.msg += " num_datanodes={0}".format(num_datanodes)
         self.msg += " min_blocks={0}".format(min_blocks)
         self.msg += " max_blocks={0}".format(max_blocks)
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
     except ValueError as _:
         raise UnknownError("invalid json returned for LiveNodes by Namenode '{0}:{1}': {2}"\
                            .format(self.host, self.port, _))
Exemplo n.º 58
0
 def parse_json(self, json_data):
     if self.list_jobs:
         print('Jenkins Jobs:\n')
         for job in json_data['jobs']:
             print(job['name'])
         sys.exit(ERRORS['UNKNOWN'])
     if 'lastCompletedBuild' in json_data:
         last_completed_build = json_data['lastCompletedBuild']
         if not last_completed_build:
             raise WarningError("job '{job}' not built yet".format(job=self.job))
         self.path = '/job/{job}/{number}/api/json'.format(job=self.job,
                                                           number=last_completed_build['number'])
         req = self.query()
         self.process_json(req.content)
         return
     displayname = json_data['displayName']
     duration = json_data['duration']
     if not isInt(duration):
         raise UnknownError('duration field returned non-integer! {0}'.format(support_msg_api()))
     duration = int(duration) / 1000
     result = json_data['result']
     timestamp = json_data['timestamp']
     if not isInt(timestamp):
         raise UnknownError('timestamp field returned non-integer! {0}'.format(support_msg_api()))
     timestamp = int(timestamp)
     building = json_data['building']
     self.msg += "build {build} status: ".format(build=displayname)
     if building:
         self.unknown()
         self.msg += 'STILL BUILDING!'
         return
     self.msg += result
     if result != 'SUCCESS':
         self.critical()
     self.msg += ', duration={duration} secs'.format(duration=duration)
     self.check_thresholds(duration)
     age = time.time() - (timestamp/1000)
     self.msg += ', age={age} secs'.format(age=sec2human(age))
     if age < 0:
         self.warning()
         self.msg += ' (< 0!)'
     if self.age and age > self.age:
         self.critical()
         self.msg += ' (> {0:d})'.format(self.age)
     self.msg += ' | build_duration={duration}s{perf_thresholds}'.format(duration=duration,
                                                                         perf_thresholds=self.get_perf_thresholds())
 def parse_json(self, json_data):
     if not isDict(json_data):
         raise UnknownError('non-dict returned for hot threads. {}'.format(support_msg_api()))
     hot_threads = json_data['hot_threads']['threads']
     top_3 = self.get_opt('top_3')
     sum_percent = 0
     last_percent = None
     for thread in hot_threads:
         thread_percent = thread['percent_of_cpu_time']
         if last_percent is None:
             last_percent = thread_percent
         if thread_percent > last_percent:
             raise UnknownError('assertion failure - subsequent thread percent is unexpectedly higher' + \
                                ', out of expected order. {}'.format(support_msg_api()))
         sum_percent += thread_percent
     self.msg = 'Logstash '
     if top_3:
         self.msg += 'top 3 hot threads cpu percentage = {}%'.format(sum_percent)
         self.check_thresholds(sum_percent)
         self.msg += ', '
     # they come sorted with highest at top
     top_thread = hot_threads[0]
     name = top_thread['name']
     percent = top_thread['percent_of_cpu_time']
     state = top_thread['state']
     # not available in 5.0, only later versions such as 6.0
     #thread_id = top_thread['thread_id']
     self.msg += 'top hot thread \'{}\' cpu percentage = {}%'.format(name, percent)
     if not top_3:
         self.check_thresholds(percent)
     self.msg += ', state = \'{}\''.format(state)
     #self.msg += ', id = {}'.format(state, thread_id)
     if self.verbose:
         if not isList(top_thread['traces']):
             raise UnknownError('hot thread\'s trace field is not a list. {}'.format(support_msg_api()))
         traces = '\\n'.join(top_thread['traces'])
         self.msg += ', traces: {}'.format(traces)
     if not top_3:
         self.msg += ', top 3 hot threads cpu percentage = {}%'.format(sum_percent)
     self.msg += ' | top_hot_thread_cpu_percentage={}%'.format(percent)
     if not top_3:
         self.msg += '{}'.format(self.get_perf_thresholds())
     self.msg += ' top_three_hot_thread_cpu_percentage={}%'.format(sum_percent)
     if top_3:
         self.msg += '{}'.format(self.get_perf_thresholds())
    def parse_json(self, json_data):
        use_security = json_data['useSecurity']
        if not isinstance(use_security, bool):
            raise UnknownError('non-boolean returned by Jenkins. {0}'.format(support_msg_api()))

        self.msg += '{0}'.format(use_security)
        if not use_security:
            self.msg += ' (expected \'True\')'
            self.critical()