示例#1
0
 def main(self):
     try:
         # Python 2.x
         super(NagiosPlugin, self).main()
         # Python 3.x
         # super().__init__()
         # redirect_stderr_stdout()
     except CriticalError as _:
         qquit('CRITICAL', _)
     except WarningError as _:
         qquit('WARNING', _)
     except UnknownError as _:
         qquit('UNKNOWN', _)
     except CodingError as _:
         qquit('UNKNOWN', 'Programming Error: {0}. {1}'.format(_, support_msg()))
     except Exception as _:  # pylint: disable=broad-except
         exception_type = type(_).__name__
         if log.isEnabledFor(logging.DEBUG):
             log.debug("exception: '%s'", exception_type)
             log.debug(traceback.format_exc())
         msg = 'Nagios Plugin Exception: {exception_type}: {msg}'.format(exception_type=exception_type, msg=self.exception_msg())
         #msg = ', '.join([x.strip() for x in msg.split('\n')])
         # ', ' doesn't look nice for ':\n ...' => ':, ...' (snakebite OutOfNNException)
         #msg = '\t'.join([x.strip() for x in msg.split('\n')])
         #if self.options.verbose > 2:
         #    msg = type(_).__name__ + ': ' + msg
         msg += '. ' + support_msg()
         qquit('UNKNOWN', msg)
 def parse_table(table):
     """ Take a Beautiful soup table as argument and parse it for compaction information
     return True if compacting or False otherwise """
     log.debug('checking first following table')
     if log.isEnabledFor(logging.DEBUG):
         log.debug('table:\n%s\n%s', table.prettify(), '='*80)
     rows = table.findChildren('tr')
     if len(rows) < 3:
         raise UnknownError('parse error - less than the 3 expected rows in table attributes')
     col_names = rows[0].findChildren('th')
     if len(col_names) < 3:
         raise UnknownError('parse error - less than the 3 expected column headings')
     first_col = col_names[0].get_text().strip()
     if first_col != 'Attribute Name':
         raise UnknownError( \
               'parse error - expected first column header to be \'{0}\' but got \'\' instead. '\
               .format('Attribute Name') \
               + support_msg())
     # ===========
     # fix for older versions of HBase < 1.0 that do not populate the table properly
     # if table does not exist
     found_compaction = False
     for row in rows[1:]:
         cols = row.findChildren('td')
         if cols[0].get_text().strip() == 'Compaction':
             found_compaction = True
     if not found_compaction:
         raise CriticalError('Compaction table attribute not found, perhaps table does not exist?')
     # ===========
     for row in rows[1:]:
         cols = row.findChildren('td')
         if len(cols) < 3:
             raise UnknownError('parse error - less than the 3 expected columns in table attributes:  ' + \
                                '{0}. {1}'.format(cols, support_msg()))
         if cols[0].get_text().strip() == 'Compaction':
             compaction_state = cols[1].get_text().strip()
             # NONE when enabled, Unknown when disabled
             log.info('compaction state = %s', compaction_state)
             for _ in ('NONE', 'Unknown'):
                 if _ in compaction_state:
                     return False
             # MAJOR_AND_MINOR shows during major compaction
             if compaction_state == 'MINOR':
                 return False
             if len(compaction_state.split('\n')) > 1:
                 raise UnknownError('parsing error - table data next to Compaction > 1 line' + \
                                    ', old version of HBase < 0.96? Otherwise HBase UI may have changed' + \
                                    '. {0}'.format(support_msg()))
             return True
示例#3
0
 def parse_output(self, content):
     soup = BeautifulSoup(content, 'html.parser')
     if log.isEnabledFor(logging.DEBUG):
         log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80))
     # shorter to just catch NoneType attribute error when tag not found and returns None
     try:
         basestats = soup.find('div', {'id': 'tab_baseStats'})
         table = basestats.find('table')
         #for table in basestats:
         rows = table.findAll('tr')
         headers = rows[0].findAll('th')
         header_server = headers[0].get_text()
         header_regions = headers[3].get_text()
         wider_table = len(headers) > 4
         # HBase 1.1 in HDP 2.3: ServerName | Start time | Requests Per Second | Num. Regions
         # HBase 1.2 (Apache):   ServerName | Start time | Version | Requests per Second | Num. Regions
         if wider_table:
             header_regions = headers[4].get_text()
         if header_server != 'ServerName':
             qquit('UNKNOWN', "Table headers in Master UI have changed" +
                   " (got {0}, expected 'ServerName'). ".format(header_server) + support_msg())
         if header_regions != 'Num. Regions':
             qquit('UNKNOWN', "Table headers in Master UI have changed" +
                   " (got {0}, expected 'Num. Regions'). ".format(header_regions) + support_msg())
         log.debug('%-50s\tnum_regions', 'server')
         for row in rows[1:]:
             # this can be something like:
             # 21689588ba40,16201,1473775984259
             # so don't apply isHost() validation because it'll fail FQDN / IP address checks
             cols = row.findAll('td')
             server = cols[0].get_text()
             if self.total_regex.match(server):
                 continue
             num_regions = cols[3].get_text()
             if wider_table:
                 num_regions = cols[4].get_text()
             if not isInt(num_regions):
                 qquit('UNKNOWN', "parsing error - got '{0}' for num regions".format(num_regions) +
                       " for server '{1}', was expecting integer.".format(server) +
                       " UI format must have changed" + support_msg())
             num_regions = int(num_regions)
             log.debug('%-50s\t%s', server, num_regions)
             if self.server_min_regions[1] is None or num_regions < self.server_min_regions[1]:
                 self.server_min_regions = (server, num_regions)
             if self.server_max_regions[1] is None or num_regions > self.server_max_regions[1]:
                 self.server_max_regions = (server, num_regions)
     except (AttributeError, TypeError, IndexError):
         qquit('UNKNOWN', 'failed to find parse output')
示例#4
0
    def run(self):
        parquet_file = self.get_opt('parquet')
        avro_dir = self.get_opt('avro_dir')
        # let Spark fail if avro/parquet aren't available
        # can't check paths exist as want to remain generically portable
        # to HDFS, local filesystm or any other uri scheme Spark supports
        log.info("Parquet Source: %s" % parquet_file)
        log.info("Avro Destination: %s" % avro_dir)

        conf = SparkConf().setAppName('HS PySpark Parquet => Avro')
        sc = SparkContext(conf=conf) # pylint: disable=invalid-name
        sqlContext = SQLContext(sc)  # pylint: disable=invalid-name
        spark_version = sc.version
        log.info('Spark version detected as %s' % spark_version)

        if not isVersionLax(spark_version):
            die("Spark version couldn't be determined. " + support_msg('pytools'))

        #  pylint: disable=invalid-name
        if isMinVersion(spark_version, 1.4):
            # this doesn't work in Spark <= 1.3 - github docs don't mention the older .method() for writing avro
            df = sqlContext.read.parquet(parquet_file)
            df.write.format('com.databricks.spark.avro').save(avro_dir)
        else:
            die('Spark <= 1.3 is not supported due to avro dependency, sorry! ' + \
                'I may change this on request but prefer people just upgrade')
    def run(self):
        self.no_args()
        host = self.get_opt('host')
        port = self.get_opt('port')
        validate_host(host)
        validate_port(port)

        url = 'http://%(host)s:%(port)s/status' % locals()
        log.debug('GET %s' % url)
        try:
            req = requests.get(url)
        except requests.exceptions.RequestException as _:
            qquit('CRITICAL', _)
        log.debug("response: %s %s" % (req.status_code, req.reason))
        log.debug("content:\n{0}\n{1}\n{2}".format('='*80, req.content.strip(), '='*80))
        if req.status_code != 200:
            qquit('CRITICAL', "%s %s" % (req.status_code, req.reason))
        soup = BeautifulSoup(req.content, 'html.parser')
        #if log.isEnabledFor(logging.DEBUG):
        #     log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80))
        try:
            status = soup.find('div', { 'class': 'alert alert-success'}).get_text().strip()
        except (AttributeError, TypeError):
            qquit('UNKNOWN', 'failed to parse Apache Drill status page. %s' % support_msg())
        self.msg = "Apache Drill status = '{0}'".format(status)
        if re.match('Running!', status):
            self.ok()
        else:
            self.critical()
 def parse_version(self, soup):
     version = None
     try:
         attributes_table = soup.find('table', {'id':'attributes_table'})
         rows = attributes_table.findAll('tr')
         num_rows = len(rows)
         self.sanity_check(num_rows > 5, 'too few rows ({0})'.format(num_rows))
         headers = rows[0].findAll('th')
         num_headers = len(headers)
         self.sanity_check(num_headers > 2, 'too few header columns ({0})'.format(num_headers))
         self.sanity_check(headers[0].text.strip() == 'Attribute Name',
                           'header first column does not match expected \'Attribute Name\'')
         self.sanity_check(headers[1].text.strip() == 'Value',
                           'header second column does not match expected \'Value\'')
         for row in rows:
             cols = row.findAll('td')
             num_cols = len(cols)
             if num_cols == 0:
                 continue
             self.sanity_check(num_cols > 2, 'too few columns ({0})'.format(num_cols))
             if cols[0].text.strip() == 'HBase Version':
                 version = cols[1].text.split(',')[0]
                 break
     except (AttributeError, TypeError):
         qquit('UNKNOWN', 'failed to find parse HBase output. {0}\n{1}'\
                          .format(support_msg(), traceback.format_exc()))
     # strip things like -hadoop2 at end
     version = version.split('-')[0]
     return version
示例#7
0
 def parse(self, content):
     # could also collect lines after 'Regions-in-transition' if parsing /dump
     # sample:
     # hbase:meta,,1.1588230740 state=PENDING_OPEN, \
     # ts=Tue Nov 24 08:26:45 UTC 2015 (1098s ago), server=amb2.service.consul,16020,1448353564099
     soup = BeautifulSoup(content, 'html.parser')
     #if log.isEnabledFor(logging.DEBUG):
     #    log.debug("BeautifulSoup prettified:\n%s\n%s", soup.prettify(), '='*80)
     # looks like HMaster UI doesn't print this section if there are no regions in transition, must assume zero
     regions_stuck_in_transition = 0
     try:
         headings = soup.findAll('h2')
         for heading in headings:
             log.debug("checking heading '%s'", heading)
             if heading.get_text() == "Regions in Transition":
                 log.debug('found Regions in Transition section header')
                 table = heading.find_next('table')
                 log.debug('checking first following table')
                 regions_stuck_in_transition = self.parse_table(table)
                 if not isInt(regions_stuck_in_transition):
                     qquit('UNKNOWN', 'parse error - ' +
                           'got non-integer \'{0}\' for regions stuck in transition when parsing HMaster UI'\
                           .format(regions_stuck_in_transition))
         return regions_stuck_in_transition
         #qquit('UNKNOWN', 'parse error - failed to find table data for regions stuck in transition')
     except (AttributeError, TypeError):
         qquit('UNKNOWN', 'failed to parse HBase Master UI status page. ' + support_msg())
示例#8
0
    def run(self):
        self.no_args()
        json_file = self.options.json
        avro_dir = self.options.avro_dir
        # let Spark fail if json/avro dir aren't available
        # can't check paths exist as want to remain generically portable
        # to HDFS, local filesystm or any other uri scheme Spark supports
        log.info("Json Source: %s" % json_file)
        log.info("Avro Destination: %s" % avro_dir)

        conf = SparkConf().setAppName('HS PySpark Json => Avro')
        sc = SparkContext(conf=conf) # pylint: disable=invalid-name
        sqlContext = SQLContext(sc)  # pylint: disable=invalid-name
        spark_version = sc.version
        log.info('Spark version detected as %s' % spark_version)

        if not isVersionLax(spark_version):
            die("Spark version couldn't be determined. " + support_msg('pytools'))

        #  pylint: disable=invalid-name
        df = None
        if isMinVersion(spark_version, 1.4):
            df = sqlContext.read.json(json_file)
        else:
            die('Spark <= 1.3 is not supported due to avro dependency, sorry! ' + \
                'I may change this on request but prefer people just upgrade')
            # log.warn('running legacy code for Spark <= 1.3')
            #json = sqlContext.jsonFile(json_file)
        # this doesn't work in Spark <= 1.3 and the github docs don't mention the older methods for writing avro using
        # the databricks avro driver
        df.write.format('com.databricks.spark.avro').save(avro_dir)
 def run(self):
     expected = self.get_opt('expected')
     if expected is not None:
         validate_regex(expected)
         log.info('expected version regex: %s', expected)
     cmd = 'nodetool version'
     log.debug('cmd: ' + cmd)
     proc = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
     (stdout, _) = proc.communicate()
     log.debug('stdout: ' + str(stdout))
     returncode = proc.wait()
     log.debug('returncode: ' + str(returncode))
     if returncode != 0 or (stdout is not None and 'Error' in stdout):
         raise CriticalError('nodetool returncode: {0}, output: {1}'.format(returncode, stdout))
     version = None
     for line in str(stdout).split('\n'):
         match = self.version_regex.match(line)
         if match:
             version = match.group(1)
     if not version:
         raise UnknownError('Cassandra version not found in output. Nodetool output may have changed. {0}'.
                            format(support_msg()))
     if not isVersion(version):
         raise UnknownError('Cassandra version unrecognized \'{0}\'. {1}'.format(version, support_msg()))
     self.ok()
     self.msg = 'Cassandra version = {0}'.format(version)
     if expected is not None and not re.search(expected, version):
         self.msg += " (expected '{0}')".format(expected)
         self.critical()
示例#10
0
 def parse(self, content):
     # could also collect lines after 'Regions-in-transition' if parsing /dump
     # sample:
     # hbase:meta,,1.1588230740 state=PENDING_OPEN, \
     # ts=Tue Nov 24 08:26:45 UTC 2015 (1098s ago), server=amb2.service.consul,16020,1448353564099
     soup = BeautifulSoup(content, 'html.parser')
     #if log.isEnabledFor(logging.DEBUG):
     #    log.debug("BeautifulSoup prettified:\n%s\n%s", soup.prettify(), '='*80)
     # looks like HMaster UI doesn't print this section if there are no regions in transition, must assume zero
     longest_rit_time = None
     try:
         headings = soup.findAll('h2')
         for heading in headings:
             log.debug("checking heading '%s'", heading)
             if heading.get_text() == "Regions in Transition":
                 log.debug('found Regions in Transition section header')
                 table = heading.find_next('table')
                 log.debug('checking first following table')
                 rows = table.findChildren('tr')
                 header_cols = rows[0].findChildren('th')
                 self.assert_headers(header_cols)
                 longest_rit_time = self.process_rows(rows)
                 return longest_rit_time
     except (AttributeError, TypeError):
         qquit('UNKNOWN', 'failed to parse HBase Master UI status page. %s' % support_msg())
示例#11
0
 def process_rows(rows):
     longest_rit_time = None
     # will skip header anyway when it doesn't find td (will contain th instead)
     # this will avoid accidentally skipping a row later if the input changes to rows[1:] instead of rows
     #for row in rows[1:]:
     for row in rows:
         print(row)
         cols = row.findChildren('td')
         # Regions in Transition rows only have 2 cols
         # <hex> region rows have Region, State, RIT time (ms)
         num_cols = len(cols)
         if num_cols == 0:
             # header row
             continue
         elif num_cols != 3:
             qquit('UNKNOWN', 'unexpected number of columns ({0}) '.format(num_cols)
                   + 'for regions in transition table. ' + support_msg())
         if 'Regions in Transition' in cols[0].get_text():
             continue
         rit_time = cols[2].get_text().strip()
         if not isInt(rit_time):
             qquit('UNKNOWN', 'parsing failed, got region in transition time of ' +
                   "'{0}', expected integer".format(rit_time))
         rit_time = int(rit_time)
         if rit_time > longest_rit_time:
             longest_rit_time = rit_time
     return longest_rit_time
    def get_rack_info(self):
        rack_regex = re.compile(r'^Rack:\s+(.+?)\s*$')
        node_regex = re.compile(r'^\s+({ip})(?::\d+)?\s+\(({host})\)\s*$'.format(ip=ip_regex, host=host_regex))
        #node_regex = re.compile(r'^\s+(.*?).*\s+\((.*?)\)\s*'.format(ip=ip_regex))
        start = time.time()
        cmd = 'hdfs dfsadmin -printTopology'
        log.debug('cmd: ' + cmd)
        proc = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        (stdout, _) = proc.communicate()
        self.query_time = time.time() - start
        log.debug('stdout: ' + str(stdout))
        returncode = proc.wait()
        log.debug('returncode: ' + str(returncode))
        if returncode != 0 or (stdout is not None and 'Error' in stdout):
            raise CriticalError('hdfs command returncode: {0}, output: {1}'.format(returncode, stdout))
        lines = str(stdout).split('\n')

        racks = {}
        rack = None
        for line in lines:
            match = rack_regex.match(line)
            if match:
                rack = match.group(1)
                log.info('found rack: %s', rack)
                continue
            # ignore early warning lines sometimes output by JVM
            # only continue from point where we find at least first Rack definition
            if not rack:
                continue
            match = node_regex.match(line)
            if match:
                #ip = match.group(1)
                host = match.group(2)
                log.info('found host: %s', host)
                if not rack:
                    raise UnknownError('node regex matched before rack was detected!! {}'.format(support_msg()))
                if rack not in racks:
                    racks[rack] = []
                racks[rack].append(host)
            elif not line:
                continue
            else:
                raise UnknownError('parsing error. {}'.format(support_msg()))
        if not rack:
            raise UnknownError('no rack information found - parse error. {}'.format(support_msg()))
        return racks
 def parse(self, req):
     soup = BeautifulSoup(req.content, 'html.parser')
     dead_workers = 0
     try:
         log.info('parsing %s page for number of dead workers', self.path)
         dead_workers = len([_ for _ in soup.find(id='data2').find('tbody').find_all('tr') if _])
     except (AttributeError, TypeError):
         raise UnknownError('failed to parse {0} Master info for dead workers. UI may have changed. {1}'.
                            format(self.software, support_msg()))
     try:
         dead_workers = int(dead_workers)
     except (ValueError, TypeError):
         raise UnknownError('{0} Master dead workers parsing returned non-integer: {1}. UI may have changed. {2}'.
                            format(self.software, dead_workers, support_msg()))
     self.msg = '{0} dead workers = {1}'.format(self.software, dead_workers)
     self.check_thresholds(dead_workers)
     self.msg += ' | '
     self.msg += 'dead_workers={0}{1}'.format(dead_workers, self.get_perf_thresholds())
 def parse_table(table):
     """ Take a Beautiful soup table as argument and parse it for compaction information
     return True if compacting or False otherwise """
     log.debug('checking first following table')
     if log.isEnabledFor(logging.DEBUG):
         log.debug('table:\n%s\n%s', table.prettify(), '='*80)
     rows = table.findChildren('tr')
     if len(rows) < 3:
         qquit('UNKNOWN', 'parse error - less than the 3 expected rows in table attributes')
     col_names = rows[0].findChildren('th')
     if len(col_names) < 3:
         qquit('UNKNOWN', 'parse error - less than the 3 expected column headings')
     first_col = col_names[0].get_text().strip()
     if first_col != 'Attribute Name':
         qquit('UNKNOWN',
               'parse error - expected first column header to be \'{0}\' but got \'\' instead. '\
               .format('Attribute Name')
               + support_msg())
     # ===========
     # fix for older versions of HBase < 1.0 that do not populate the table properly
     # if table does not exist
     found_compaction = False
     for row in rows[1:]:
         cols = row.findChildren('td')
         if cols[0].get_text().strip() == 'Compaction':
             found_compaction = True
     if not found_compaction:
         qquit('CRITICAL', 'Compaction table attribute not found, perhaps table does not exist?')
     # ===========
     for row in rows[1:]:
         cols = row.findChildren('td')
         if len(cols) < 3:
             qquit('UNKNOWN', 'parse error - less than the 3 expected columns in table attributes:  ' + \
                              '{0}. {1}'.format(cols, support_msg()))
         if cols[0].get_text().strip() == 'Compaction':
             compaction_state = cols[1].get_text().strip()
             # NONE when enabled, Unknown when disabled
             for _ in ('NONE', 'Unknown'):
                 if _ in compaction_state:
                     return False
             return True
示例#15
0
 def parse_is_table_compacting(content):
     soup = BeautifulSoup(content, 'html.parser')
     if log.isEnabledFor(logging.DEBUG):
         log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80))
     try:
         headings = soup.findAll('h2')
         for heading in headings:
             log.debug("checking heading '%s'", heading)
             if heading.get_text() == 'Table Attributes':
                 log.debug('found Table Attributes section header')
                 table = heading.find_next('table')
                 log.debug('checking first following table')
                 if log.isEnabledFor(logging.DEBUG):
                     log.debug('table:\n%s\n%s', table.prettify(), '='*80)
                 rows = table.findChildren('tr')
                 if len(rows) < 3:
                     qquit('UNKNOWN', 'parse error - less than the 3 expected rows in table attributes')
                 col_names = rows[0].findChildren('th')
                 if len(col_names) < 3:
                     qquit('UNKNOWN', 'parse error - less than the 3 expected column headings')
                 first_col = col_names[0].get_text().strip()
                 if first_col != 'Attribute Name':
                     qquit('UNKNOWN',
                           'parse error - expected first column header to be \'{0}\' but got \'\' instead. '\
                           .format('Attribute Name')
                           + support_msg())
                 for row in rows[1:]:
                     cols = row.findChildren('td')
                     if len(cols) < 3:
                         qquit('UNKNOWN', 'parse error - less than the 3 expected columns in table attributes. '
                               + support_msg())
                     if cols[0].get_text().strip() == 'Compaction':
                         compaction_state = cols[1].get_text().strip()
                         # NONE when enabled, Unknown when disabled
                         if compaction_state in ('NONE', 'Unknown'):
                             return False
                         else:
                             return True
         qquit('UNKNOWN', 'parse error - failed to find Table Attributes section in JSP. ' + support_msg())
     except (AttributeError, TypeError):
         qquit('UNKNOWN', 'failed to parse output. ' + support_msg())
 def parse(self, req):
     soup = BeautifulSoup(req.content, 'html.parser')
     #if log.isEnabledFor(logging.DEBUG):
     #    log.debug("BeautifulSoup prettified:\n%s\n%s", soup.prettify(), '='*80)
     # this masks underlying exception
     #try:
     tab = soup.find('div', {'id':'tab_baseStats'})
     table = tab.find_next('table')
     rows = table.findChildren('tr')
     if len(rows) < 2:
         raise UnknownError('no regionserver rows found in base stats table! {}'.format(support_msg()))
     # HBase 1.1 in HDP 2.3: ServerName | Start time | Requests Per Second | Num. Regions
     # HBase 1.2 (Apache):   ServerName | Start time | Version | Requests per Second | Num. Regions
     # HBase 1.4 (Apache):   ServerName | Start time | Last Contact | Version | Requests Per Second | Num. Regions
     th_list = rows[0].findChildren('th')
     if len(th_list) < 4:
         raise UnknownError('no table header for base stats table!')
     expected_header = 'Requests Per Second'
     col_index = len(th_list) - 2
     found_header = th_list[col_index].text
     if found_header != expected_header:
         raise UnknownError("wrong table header found for column 4! Expected '{}' but got '{}'. {}"\
                            .format(expected_header, found_header, support_msg()))
     stats = {}
     for row in rows[1:]:
         cols = row.findChildren('td')
         if len(cols) < 4:
             raise UnknownError('4th column in table not found! {}'.format(support_msg()))
         regionserver = cols[0].text.strip().split(',')[0]
         if 'Total:' in regionserver:
             break
         reqs_per_sec = cols[col_index].text.strip()
         if not isInt(reqs_per_sec):
             raise UnknownError("non-integer found in Requests Per Second column for regionserver '{}'. {}"\
                                .format(regionserver, support_msg()))
         # fix for this is to cast string '1.0' to float and then cast to int
         # ValueError: invalid literal for int() with base 10: '1.0'
         stats[regionserver] = int(float(reqs_per_sec))
     self.process_stats(stats)
 def parse(self, req):
     soup = BeautifulSoup(req.content, 'html.parser')
     # if log.isEnabledFor(logging.DEBUG):
     #     log.debug("BeautifulSoup prettified:\n%s\n%s", soup.prettify(), '='*80)
     status = None
     try:
         status = soup.find('div', {'class': 'alert alert-success'}).get_text().strip()
     except (AttributeError, TypeError):
         qquit('UNKNOWN', 'failed to parse Apache Drill status page. %s' % support_msg())
     if re.match('Running!?$', status):
         self.ok()
     else:
         self.critical()
     return status
示例#18
0
 def check_id(self, docker_image_line):
     #_id = output[1][name_len + 10:name_len + 10 + 20].strip()
     _id = docker_image_line.split()[2]
     log.debug('id: %s', _id)
     self.msg += ", id = '{id}'".format(id=_id)
     if self.expected_id:
         log.debug('checking expected --id')
         if not re.match(r'(sha\d+:)?\w+', _id):
             raise UnknownError("{msg} not in sha format as expected! {support}"\
                                .format(msg=self.msg, support=support_msg()))
         if _id != self.expected_id:
             self.critical()
             self.msg += " (expected id = '{0}')".format(self.expected_id)
     return _id
 def parse(self, req):
     soup = BeautifulSoup(req.content, 'html.parser')
     if log.isEnabledFor(logging.DEBUG):
         log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80))
     version = None
     try:
         _ = soup.find('th', {'scope': 'row'})
         if _.text.strip() == 'Version':
             version = _.find_next_sibling('td').text
     except (AttributeError, TypeError):
         raise UnknownError('failed to parse output. {}'.format(support_msg()))
     if not version:
         raise UnknownError('failed to retrieve version')
     return version
 def parse_is_table_compacting(self, content):
     soup = BeautifulSoup(content, 'html.parser')
     if log.isEnabledFor(logging.DEBUG):
         log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80))
     try:
         headings = soup.findAll('h2')
         for heading in headings:
             log.debug("checking heading '%s'", heading)
             if heading.get_text() == 'Table Attributes':
                 log.debug('found Table Attributes section header')
                 table = heading.find_next('table')
                 return self.parse_table(table)
         qquit('UNKNOWN', 'parse error - failed to find Table Attributes section in JSP. ' + support_msg())
     except (AttributeError, TypeError):
         qquit('UNKNOWN', 'failed to parse output. ' + support_msg())
示例#21
0
 def check_size(self, docker_image_line):
     match = re.search(r'(\d+(?:\.\d+)?)\s*([KMG]B)\s*$', docker_image_line)
     if match:
         size = match.group(1)
         units = match.group(2).strip()
         log.debug("size: %s", size)
         log.debug("units: %s", units)
         size_in_bytes = expand_units(size, units)
         log.debug("size in bytes: %s", size_in_bytes)
     else:
         raise UnknownError('failed to parse size. {0}'.format(support_msg()))
     self.msg += ", size = {size} {units}".format(size=size, units=units)
     log.debug('checking size %s against thresholds', size_in_bytes)
     self.check_thresholds(size_in_bytes)
     return size_in_bytes
 def parse(self, req):
     soup = BeautifulSoup(req.content, 'html.parser')
     # if log.isEnabledFor(logging.DEBUG):
     #     log.debug("BeautifulSoup prettified:\n%s\n%s", soup.prettify(), '='*80)
     status = None
     try:
         status = soup.find('div', {'class': 'alert alert-success'}).get_text().strip()
     except (AttributeError, TypeError):
         qquit('UNKNOWN', 'failed to parse Apache Drill status page. %s' % support_msg())
     # Found a STARTUP status in cluster nodes state but looking at the code for /status is looks like Running is all there is, or results for this endpoint are not properly undocumented - see https://issues.apache.org/jira/browse/DRILL-6407
     #if status in ("Startup", "Initializing"):
     #    self.warning()
     if re.match('^Running!?$', status):
         self.ok()
     else:
         self.critical()
     return status
示例#23
0
 def main(self):
     try:
         # Python 2.x
         super(NagiosPlugin, self).main()
         # Python 3.x
         # super().__init__()
         # redirect_stderr_stdout()
     except CriticalError as _:
         qquit('CRITICAL', _)
     except WarningError as _:
         qquit('WARNING', _)
     except UnknownError as _:
         qquit('UNKNOWN', _)
     except CodingError as _:
         qquit('UNKNOWN', 'Programming Error: {0}. {1}'.format(_, support_msg()))
     except Exception as _:  # pylint: disable=broad-except
         qquit('UNKNOWN', _)
 def parse(self, req):
     soup = BeautifulSoup(req.content, 'html.parser')
     last_heartbeat = None
     try:
         self.list_workers(soup)
         heartbeat_col_header = soup.find('th', text='Node Name').find_next_sibling().get_text()
         # make sure ordering of columns is as we expect so we're parsing the correct number for heartbeat lag
         assert heartbeat_col_header == 'Last Heartbeat'
         last_heartbeat = soup.find('th', text=self.node).find_next_sibling().get_text()
         if last_heartbeat is None:
             raise AttributeError
     except (AttributeError, TypeError):
         raise CriticalError("{0} worker '{1}' not found among list of live workers!"\
                             .format(self.software, self.node))
     if not isInt(last_heartbeat):
         raise UnknownError("last heartbeat '{0}' for node '{1}' is not an integer, possible parsing error! {2}"\
                            .format(last_heartbeat, self.node, support_msg()))
     self.msg = "{0} worker '{1}' last heartbeat = {2} secs ago".format(self.software, self.node, last_heartbeat)
     self.check_thresholds(last_heartbeat)
     self.msg += ' | last_heartbeat={0}s{1}'.format(last_heartbeat, self.get_perf_thresholds())
示例#25
0
    def run(self):
        json_file = self.get_opt('json')
        parquet_dir = self.get_opt('parquet_dir')
        # let Spark fail if csv/parquet aren't available
        # can't check paths exist as want to remain generically portable
        # to HDFS, local filesystm or any other uri scheme Spark supports
        log.info("Json Source: %s" % json_file)
        log.info("Parquet Destination: %s" % parquet_dir)

        conf = SparkConf().setAppName('HS PySpark JSON => Parquet')
        sc = SparkContext(conf=conf) # pylint: disable=invalid-name
        sqlContext = SQLContext(sc)  # pylint: disable=invalid-name
        spark_version = sc.version
        log.info('Spark version detected as %s' % spark_version)
        if not isVersionLax(spark_version):
            die("Spark version couldn't be determined. " + support_msg('pytools'))
        if isMinVersion(spark_version, 1.4):
            df = sqlContext.read.json(json_file) # pylint: disable=invalid-name
            df.write.parquet(parquet_dir)
        else:
            log.warn('running legacy code for Spark <= 1.3')
            df = sqlContext.jsonFile(json_file) # pylint: disable=invalid-name
            df.saveAsParquetFile(parquet_dir)
 def sanity_check(condition, msg):
     if not condition:
         qquit('UNKNOWN', 'HBase attribute table header ' +
               msg + ', failed sanity check! ' + support_msg())
示例#27
0
    def run(self):
        csv_file = self.get_opt('csv')
        avro_dir = self.get_opt('avro_dir')
        has_header = self.get_opt('has_header')
        # I don't know why the Spark guys made this a string instead of a bool
        header_str = 'false'
        if has_header:
            header_str = 'true'
        schema = self.get_opt('schema')
        # let Spark fail if csv/avro dir aren't available
        # can't check paths exist as want to remain generically portable
        # to HDFS, local filesystm or any other uri scheme Spark supports
        log.info("CSV Source: %s" % csv_file)
        log.info("Avro Destination: %s" % avro_dir)

        if schema:
            def get_type(arg):
                arg = str(arg).lower()
                if arg not in self.types_mapping:
                    self.usage("invalid type '%s' defined in --schema, must be one of: %s"
                               % (arg, ', '.join(sorted(self.types_mapping.keys()))))
                # return self.types_mapping[arg]
                module = __import__('pyspark.sql.types', globals(), locals(), ['types'], -1)
                class_ = getattr(module, self.types_mapping[arg])
                _ = class_()
                return _

            def create_struct(arg):
                name = str(arg).strip()
                data_type = 'string'
                if ':' in arg:
                    (name, data_type) = arg.split(':', 1)
                data_class = get_type(data_type)
                return StructField(name, data_class, True)
            # see https://github.com/databricks/spark-csv#python-api
            self.schema = StructType([create_struct(_) for _ in schema.split(',')])
            log.info('generated CSV => Spark schema')

        conf = SparkConf().setAppName('HS PySpark CSV => Avro')
        sc = SparkContext(conf=conf) # pylint: disable=invalid-name
        sqlContext = SQLContext(sc)  # pylint: disable=invalid-name
        spark_version = sc.version
        log.info('Spark version detected as %s' % spark_version)

        if not isVersionLax(spark_version):
            die("Spark version couldn't be determined. " + support_msg('pytools'))

        #  pylint: disable=invalid-name
        df = None
        if isMinVersion(spark_version, 1.4):
            if has_header and not schema:
                log.info('inferring schema from CSV headers')
                df = sqlContext.read.format('com.databricks.spark.csv')\
                     .options(header=header_str, inferschema='true')\
                     .load(csv_file)
            else:
                log.info('using explicitly defined schema')
                schema = self.schema
                df = sqlContext.read\
                     .format('com.databricks.spark.csv')\
                     .options(header=header_str)\
                     .load(csv_file, schema=schema)
        else:
            die('Spark <= 1.3 is not supported due to avro dependency, sorry! ' + \
                'I may change this on request but prefer people just upgrade')
            # log.warn('running legacy code for Spark <= 1.3')
            # if has_header and not schema:
            #     log.info('inferring schema from CSV headers')
            #     df = sqlContext.load(source="com.databricks.spark.csv", path=csv_file,
            #                          header=header_str, inferSchema='true')
            # elif self.schema:
            #     log.info('using explicitly defined schema')
            #     df = sqlContext.load(source="com.databricks.spark.csv", path=csv_file,
            #                          header=header_str, schema=self.schema)
            # else:
            #     die('no header and no schema, caught late')
        # this doesn't work in Spark <= 1.3 and the github docs don't mention the older methods for writing avro using
        # the databricks avro driver
        df.write.format('com.databricks.spark.avro').save(avro_dir)
示例#28
0
 def parse_error(msg):
     qquit('UNKNOWN', 'parse error - ' + msg + '. ' + support_msg())
示例#29
0
 def sanity_check(condition, msg):
     if not condition:
         qquit(
             'UNKNOWN', 'HBase attribute table header ' + msg +
             ', failed sanity check! ' + support_msg())
示例#30
0
 def parse_error(msg):
     qquit('UNKNOWN', 'parse error - ' + msg + '. ' + support_msg())
示例#31
0
 def parse(self, req):
     soup = BeautifulSoup(req.content, 'html.parser')
     if log.isEnabledFor(logging.DEBUG):
         log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80))
     version = None
     try:
         _ = soup.find('span', {'class': 'jenkins_ver'})
         log.debug('found span containing jenkins_ver')
         if _:
             version = _.text.strip()
     except (AttributeError, TypeError):
         raise UnknownError('failed to parse output')
     if not version:
         raise UnknownError('failed to retrieve version')
     log.debug('extracting version for Jenkins version string: %s', version)
     _ = re.match(r'Jenkins ver\. ({0})'.format(version_regex), version)
     if not _:
         raise UnknownError('failed to parse version string, format may have changed. {0}'.format(support_msg()))
     version = _.group(1)
     return version
示例#32
0
    def run(self):
        csv_file = self.options.csv
        avro_dir = self.options.avro_dir
        has_header = self.options.has_header
        # I don't know why the Spark guys made this a string instead of a bool
        header_str = 'false'
        if has_header:
            header_str = 'true'
        schema = self.options.schema
        # let Spark fail if csv/avro dir aren't available
        # can't check paths exist as want to remain generically portable
        # to HDFS, local filesystm or any other uri scheme Spark supports
        log.info("CSV Source: %s" % csv_file)
        log.info("Avro Destination: %s" % avro_dir)

        if schema:
            def get_type(arg):
                arg = str(arg).lower()
                if arg not in self.types_mapping:
                    self.usage("invalid type '%s' defined in --schema, must be one of: %s"
                               % (arg, ', '.join(sorted(self.types_mapping.keys()))))
                # return self.types_mapping[arg]
                module = __import__('pyspark.sql.types', globals(), locals(), ['types'], -1)
                class_ = getattr(module, self.types_mapping[arg])
                _ = class_()
                return _

            def create_struct(arg):
                name = arg
                data_type = 'string'
                if ':' in arg:
                    (name, data_type) = arg.split(':', 1)
                data_class = get_type(data_type)
                return StructField(name, data_class, True)
            # see https://github.com/databricks/spark-csv#python-api
            self.schema = StructType([create_struct(_) for _ in schema.split(',')])
            log.info('generated CSV => Spark schema')

        conf = SparkConf().setAppName('HS PySpark CSV => Avro')
        sc = SparkContext(conf=conf) # pylint: disable=invalid-name
        sqlContext = SQLContext(sc)  # pylint: disable=invalid-name
        spark_version = sc.version
        log.info('Spark version detected as %s' % spark_version)

        if not isVersionLax(spark_version):
            die("Spark version couldn't be determined. " + support_msg('pytools'))

        #  pylint: disable=invalid-name
        df = None
        if isMinVersion(spark_version, 1.4):
            if has_header and not schema:
                log.info('inferring schema from CSV headers')
                df = sqlContext.read.format('com.databricks.spark.csv')\
                     .options(header=header_str, inferschema='true')\
                     .load(csv_file)
            else:
                log.info('using explicitly defined schema')
                schema = self.schema
                df = sqlContext.read\
                     .format('com.databricks.spark.csv')\
                     .options(header=header_str)\
                     .load(csv_file, schema=schema)
        else:
            die('Spark <= 1.3 is not supported due to avro dependency, sorry! ' + \
                'I may change this on request but prefer people just upgrade')
            # log.warn('running legacy code for Spark <= 1.3')
            # if has_header and not schema:
            #     log.info('inferring schema from CSV headers')
            #     df = sqlContext.load(source="com.databricks.spark.csv", path=csv_file,
            #                          header=header_str, inferSchema='true')
            # elif self.schema:
            #     log.info('using explicitly defined schema')
            #     df = sqlContext.load(source="com.databricks.spark.csv", path=csv_file,
            #                          header=header_str, schema=self.schema)
            # else:
            #     die('no header and no schema, caught late')
        # this doesn't work in Spark <= 1.3 and the github docs don't mention the older methods for writing avro using
        # the databricks avro driver
        df.write.format('com.databricks.spark.avro').save(avro_dir)
 def parse(self, req):
     soup = BeautifulSoup(req.content, 'html.parser')
     if log.isEnabledFor(logging.DEBUG):
         log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80))
     version = None
     try:
         _ = soup.find('span', {'class': 'jenkins_ver'})
         log.debug('found span containing jenkins_ver')
         if _:
             version = _.text.strip()
     except (AttributeError, TypeError):
         raise UnknownError('failed to parse output')
     if not version:
         raise UnknownError('failed to retrieve version')
     log.debug('extracting version for Jenkins version string: %s', version)
     _ = re.match(r'Jenkins ver\. ({0})'.format(version_regex), str(version))
     if not _:
         raise UnknownError('failed to parse version string, format may have changed. {0}'.format(support_msg()))
     version = _.group(1)
     return version
 def run(self):
     expected = self.get_opt("expected")
     if expected is not None:
         validate_regex(expected)
         log.info("expected version regex: %s", expected)
     cmd = "consul version"
     log.debug("cmd: " + cmd)
     proc = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
     (stdout, _) = proc.communicate()
     log.debug("stdout: " + str(stdout))
     returncode = proc.wait()
     log.debug("returncode: " + str(returncode))
     if returncode != 0 or (stdout is not None and "Error" in stdout):
         raise CriticalError("consul returncode: {0}, output: {1}".format(returncode, stdout))
     version = None
     for line in str(stdout).split("\n"):
         match = self.version_regex.match(line)
         if match:
             version = match.group(1)
     if not version:
         raise UnknownError(
             "Consul version not found in output. Consul output may have changed. {0}".format(support_msg())
         )
     if not isVersion(version):
         raise UnknownError("Consul version unrecognized '{0}'. {1}".format(version, support_msg()))
     self.ok()
     self.msg = "Consul version = {0}".format(version)
     if expected is not None and not re.search(expected, version):
         self.msg += " (expected '{0}')".format(expected)
         self.critical()