Exemplo n.º 1
0
    def get_ssh_address_attempt(self, job_id):
        #url = 'https://travis-ci.org/{repo}/jobs/{job_id}'.format(repo=repo, job_id=job_id)
        url = 'https://api.travis-ci.org/jobs/{job_id}/log.txt?deansi=true'.format(job_id=job_id)
        log.debug('GET %s' % url)
        try:
            req = requests.get(url)
        except requests.exceptions.RequestException as _:
            raise CriticalError(_)
        log.debug("response: %s %s", req.status_code, req.reason)
        log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(), '=' * 80)
        # Travis CI behaviour has changed from 200 with no content indicating build log empty, not started yet
        # to now returning "500 Internal Server Error", content: "Sorry, we experienced an error."
        if req.status_code == 500:
            # don't output 500 it will confuse users in to thinking there is a real error which 500 usually indicates
            #log.info('500 internal server error, build not started yet')
            log.info('build not started yet')
            return None
        if req.status_code != 200:
            error_message = self.parse_travis_error(req)
            raise CriticalError('{0} {1}: {2}'.format(req.status_code, req.reason, error_message))
        content = req.content
        if not content:
            log.info('build log empty, build not started yet')
            return None
        # find last non-blank line - do this after checking for no content otherwise will hit StopIteration
        last_line = next(_ for _ in reversed(content.split('\n')) if _)
        #log.debug('last line: %s', last_line)
        # 'Done: Job Cancelled'
        if 'Job Cancelled' in last_line:
            raise CriticalError(last_line)
        elif 'Your build has been stopped' in last_line:
            raise CriticalError(last_line)
        # Done. Your build exited with 0
        elif 'build exited with' in last_line:
            raise CriticalError(last_line)
        # The build has been terminated
        elif 'build has been terminated' in last_line:
            raise CriticalError(last_line)

        ssh_address = None
        regex_ssh = re.compile(r'^\s*ssh\s+(\w+\@{host_regex})\s*$'.format(host_regex=host_regex), re.I)
        for line in content.split('\n'):
            match = regex_ssh.match(line)
            if match:
                ssh_address = match.group(1)
                break
        return ssh_address
Exemplo n.º 2
0
    def run(self):
        server_url = '{proto}://{host}:{port}'.format(proto=self.protocol,
                                                      host=self.host,
                                                      port=self.port)
        try:
            log.debug('setting up Jenkins connection to %s', server_url)
            start_time = time.time()
            server = jenkins.Jenkins(server_url,
                                     username=self.user,
                                     password=self.password,
                                     timeout=self.timeout / 3)
            if log.isEnabledFor(logging.DEBUG):
                log.debug('getting user')
                user = server.get_whoami()
                log.debug('connected as user %s', jsonpp(user))
            log.debug('getting plugin info')
            #plugins = server.get_plugins()
            # deprecated but .get_plugins() output is not JSON serializable
            # so must use old deprecated method get_plugins_info() :-/
            plugins = server.get_plugins_info()
            query_time = time.time() - start_time
        except jenkins.JenkinsException as _:
            raise CriticalError(_)

        if log.isEnabledFor(logging.DEBUG):
            log.debug('%s', jsonpp(plugins))
        plugin_count = len(plugins)
        update_count = 0
        for plugin in plugins:
            if plugin['hasUpdate']:
                update_count += 1
        self.msg += " {0} plugin update{1} available out of {2} installed plugin{3}".format(
            update_count, plural(update_count), plugin_count,
            plural(plugin_count))
        if update_count:
            self.warning()
        self.msg += ' | updates_available={0};1 plugins_installed={1} query_time={2:.4f}s'.format(
            update_count, plugin_count, query_time)
 def run(self):  # pylint: disable=no-self-use
     regex = re.compile(r'^HugePages_Total:\s+(\d+)\s*$')
     hugepages_total = None
     with open('/proc/meminfo') as meminfo:
         for line in meminfo:
             if 'HugePage' in line:
                 log.debug(line)
             match = regex.match(line)
             if match:
                 hugepages_total = int(match.group(1))  # protected by regex
                 break
         if hugepages_total is None:
             raise UnknownError(
                 'HugePages Total not found in /proc/meminfo. {}'.format(
                     support_msg()))
     if hugepages_total == 0:
         self.msg += 'disabled'
     else:
         raise CriticalError(
             ' Huge Pages = enabled. This should be disabled for Big Data '
             +
             'systems such as Hadoop / MongoDB for performance reasons etc...'
         )
Exemplo n.º 4
0
    def get_ssh_address_attempt(self, job_id):
        #url = 'https://travis-ci.org/{repo}/jobs/{job_id}'.format(repo=repo, job_id=job_id)
        url = 'https://api.travis-ci.org/jobs/{job_id}/log.txt?deansi=true'.format(
            job_id=job_id)
        log.debug('GET %s' % url)
        try:
            req = requests.get(url)
        except requests.exceptions.RequestException as _:
            raise CriticalError(_)
        log.debug("response: %s %s", req.status_code, req.reason)
        log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(),
                  '=' * 80)
        if req.status_code != 200:
            error_message = self.parse_travis_error(req)
            raise CriticalError('{0} {1}: {2}'.format(req.status_code,
                                                      req.reason,
                                                      error_message))
        content = req.content
        if not content:
            log.info('build log empty, build not started yet')
            return None
        # find last non-blank line - do this after checking for no content otherwise will hit StopIteration
        last_line = next(_ for _ in reversed(content.split('\n')) if _)
        #log.debug('last line: %s', last_line)
        # 'Done: Job Cancelled'
        if 'Job Cancelled' in last_line:
            raise CriticalError(last_line)
        elif 'Your build has been stopped' in last_line:
            raise CriticalError(last_line)
        # Done. Your build exited with 0
        elif 'build exited with' in last_line:
            raise CriticalError(last_line)
        # The build has been terminated
        elif 'build has been terminated' in last_line:
            raise CriticalError(last_line)

        ssh_address = None
        regex_ssh = re.compile(
            r'^ssh\s+(\w+\@{host_regex})\s*$'.format(host_regex=host_regex))
        for line in content.split('\n'):
            match = regex_ssh.match(line)
            if match:
                ssh_address = match.group(1)
                break
        return ssh_address
Exemplo n.º 5
0
 def end(self):
     if self._consumed_message is None:
         raise UnknownError('read value is not set!')
     log.info("checking consumed message '%s' == published message '%s'",
              self._consumed_message, self.publish_message)
     if self._consumed_message != self.publish_message:
         raise CriticalError(
             "wrote '{0}' but got back '{1}' instead".format(
                 self.publish_message, self._consumed_message))
     self.msg = '{0} message published and consumed back successfully'.format(
         self.name)
     self.msg += ', published in {0:.{1}f} secs'.format(
         self._publish_time, self._precision)
     self.check_thresholds(self._publish_time)
     self.msg += ', consumed in {0:.{1}f} secs'.format(
         self._consume_time, self._precision)
     self.check_thresholds(self._consume_time)
     self.msg += ', total time = {0:.{1}f} secs'.format(
         self._total_time, self._precision)
     self.msg += ' | publish_time={0:.{5}f}s{1} consume_time={2:.{5}f}s{3} total_time={4:.{5}f}s'.format(
         self._publish_time, self.get_perf_thresholds(), self._consume_time,
         self.get_perf_thresholds(), self._total_time, self._precision)
     qquit(self.status, self.msg)
Exemplo n.º 6
0
 def end(self):
     # don't inherit read check's end as we want a different output format
     if self._read_value is None:
         raise UnknownError('read value is not set!')
     log.info("checking read key '%s' == written key '%s'",
              self._read_value, self._write_value)
     if self._read_value != self._write_value:
         raise CriticalError(
             "wrote '{0}' but got back '{1}' instead".format(
                 self._write_value, self._read_value))
     self.msg = '{0} key written and read back successfully'.format(
         self.name)
     self.msg += ', written in {0:.7f} secs'.format(self._write_timing)
     self.check_thresholds(self._write_timing)
     self.msg += ', read in {0:.7f} secs'.format(self._read_timing)
     self.check_thresholds(self._read_timing)
     self.msg += ', deleted in {0:.7f} secs'.format(self._delete_timing)
     self.check_thresholds(self._delete_timing)
     self.msg += ' | write_time={0:.7f}s{1} read_time={2:.7f}s{3} delete_time={4:.7f}s{5}'.format(
         self._write_timing, self.get_perf_thresholds(), self._read_timing,
         self.get_perf_thresholds(), self._delete_timing,
         self.get_perf_thresholds())
     qquit(self.status, self.msg)
Exemplo n.º 7
0
    def parse_json(self, json_data):
        if not isList(json_data):
            raise UnknownError(
                'non-list returned for storage plugins. {}'.format(
                    support_msg_api()))
        if self.get_opt('list'):
            print('Apache Drill storage plugins:\n')
            print('=' * 50)
            print('%-10s\t%-10s\t%s' % ('Name', 'Type', 'Enabled'))
            print('=' * 50 + '\n')
            for storage_plugin in json_data:
                name = storage_plugin['name']
                config = storage_plugin['config']
                plugin_type = config['type']
                enabled = config['enabled']
                print('%-10s\t%-10s\t%s' % (name, plugin_type, enabled))
            sys.exit(ERRORS['UNKNOWN'])

        config = None
        for storage_plugin in json_data:
            name = storage_plugin['name']
            if name == self.storage_plugin:
                config = storage_plugin['config']
                plugin_type = config['type']
                enabled = config['enabled']
                break
        if not config:
            raise CriticalError("Apache Drill storage plugin '{}' not found! See --list for available plugins!"\
                                .format(self.storage_plugin))
        self.msg = "Apache Drill storage plugin '{}' enabled = {}, plugin type = '{}'"\
                   .format(self.storage_plugin, enabled, plugin_type)
        if not enabled:
            self.critical()
        _type = self.get_opt('type')
        if _type and _type != plugin_type:
            self.critical()
            self.msg += " (expected '{}')".format(_type)
Exemplo n.º 8
0
    def run(self):
        server_url = '{proto}://{host}:{port}'.format(proto=self.protocol,
                                                      host=self.host,
                                                      port=self.port)
        try:
            log.debug('setting up Jenkins connection to %s', server_url)
            start_time = time.time()
            server = jenkins.Jenkins(server_url,
                                     username=self.user,
                                     password=self.password,
                                     timeout=self.timeout / 3)
            if log.isEnabledFor(logging.DEBUG):
                log.debug('getting user')
                user = server.get_whoami()
                log.debug('connected as user %s', jsonpp(user))
            log.debug('getting Jenkins nodes')
            nodes = server.get_nodes()
            log.debug('nodes: %s', nodes)
            node_count = len(nodes)
            log.debug('node count: %s', node_count)
            offline_nodes = 0
            for node in nodes:
                if node['offline']:
                    offline_nodes += 1
            self.msg += '{0} offline node{1}'.format(offline_nodes,
                                                     plural(offline_nodes))
            self.check_thresholds(offline_nodes)
            self.msg += ' out of {0} node{1}'.format(node_count,
                                                     plural(node_count))
        except jenkins.JenkinsException as _:
            raise CriticalError(_)

        query_time = time.time() - start_time
        self.msg += ' | offline_nodes={0:d}'.format(offline_nodes)
        self.msg += self.get_perf_thresholds()
        self.msg += ' node_count={0:d}'.format(node_count)
        self.msg += ' query_time={0:.4f}s'.format(query_time)
Exemplo n.º 9
0
 def run(self):
     client = boto3.client('cloudtrail')
     log.info('describing cloud trails')
     _ = client.describe_trails()
     log.debug('%s', jsonpp(_))
     trail_list = _['trailList']
     num_trails = len(trail_list)
     log.info('found %s trails', num_trails)
     if self.get_opt('list_trails'):
         print('Cloud Trails:\n')
         for trail in trail_list:
             print(trail['Name'])
             sys.exit(ERRORS['UNKNOWN'])
     if self.trail_name:
         trail_info = None
         for trail in trail_list:
             name = trail['Name']
             if self.trail_name and self.trail_name != name:
                 continue
             is_multi_region = trail['IsMultiRegionTrail']
             is_logfile_validation = trail['LogFileValidationEnabled']
             trail_info = client.get_trail_status(Name=name)
             log.debug('%s', jsonpp(trail_info))
         if not trail_info:
             raise CriticalError('info for trail \'{}\' not found'.format(
                 self.trail_name))
         is_logging = trail_info['IsLogging']
         if not is_logging:
             self.warning()
         elif not is_multi_region and not self.no_multi_region:
             self.warning()
         elif not is_logfile_validation and not self.no_logfile_validation:
             self.warning()
         self.msg = 'AWS cloudtrail \'{}\' logging: {}, multi-region: {}, logfile-validation-enabled: {}'\
                    .format(self.trail_name, is_logging, is_multi_region, is_logfile_validation)
     else:
         self.check_trails(client, trail_list)
 def parse(self, req):
     soup = BeautifulSoup(req.content, 'html.parser')
     last_heartbeat = None
     try:
         self.list_workers(soup)
         heartbeat_col_header = soup.find(
             'th', text='Node Name').find_next_sibling().get_text()
         # make sure ordering of columns is as we expect so we're parsing the correct number for heartbeat lag
         assert heartbeat_col_header == 'Last Heartbeat'
         last_heartbeat = soup.find(
             'th', text=self.node).find_next_sibling().get_text()
         if last_heartbeat is None:
             raise AttributeError
     except (AttributeError, TypeError):
         raise CriticalError("{0} worker '{1}' not found among list of live workers!"\
                             .format(self.software, self.node))
     if not isInt(last_heartbeat):
         raise UnknownError("last heartbeat '{0}' for node '{1}' is not an integer, possible parsing error! {2}"\
                            .format(last_heartbeat, self.node, support_msg()))
     self.msg = "{0} worker '{1}' last heartbeat = {2} secs ago".format(
         self.software, self.node, last_heartbeat)
     self.check_thresholds(last_heartbeat)
     self.msg += ' | last_heartbeat={0}s{1}'.format(
         last_heartbeat, self.get_perf_thresholds())
 def parse_json(self, json_data):
     if not isList(json_data):
         raise UnknownError(
             'json data returned is not list as expected! {}'.format(
                 support_msg_api()))
     if not json_data:
         raise CriticalError('OpenTSDB no metric received in last minute!')
     highest_timestamp = 0
     for metric in json_data:
         for timestamp in metric['dps'].keys():
             timestamp = int(timestamp)
             if timestamp > highest_timestamp:
                 highest_timestamp = timestamp
     log.info('highest timestamp = %s', highest_timestamp)
     metric_latest_age = time.time() - highest_timestamp
     if metric_latest_age < 0:
         raise UnknownError('OpenTSDB latest metric age is {} secs in the future! Mismatch server clocks?'\
                            .format(abs(metric_latest_age)))
     metric_latest_age = '{:.2f}'.format(metric_latest_age)
     self.msg = 'OpenTSDB latest metric age = {} secs'.format(
         metric_latest_age)
     self.check_thresholds(metric_latest_age)
     self.msg += ' | metric_latest_age={}s{}'.format(
         metric_latest_age, self.get_perf_thresholds())
 def run(self):
     client = boto3.client('config')
     log.info('describing config recorders')
     _ = client.describe_configuration_recorder_status()
     log.debug('%s', jsonpp(_))
     recorders = _['ConfigurationRecordersStatus']
     num_recorders = len(recorders)
     log.info('found %s recorders', num_recorders)
     if self.get_opt('list_recorders'):
         print('Config Recorders:\n')
         for recorder in recorders:
             print(recorder['name'])
             sys.exit(ERRORS['UNKNOWN'])
     if self.recorder_name:
         recorder_info = None
         for recorder in recorders:
             name = recorder['name']
             if self.recorder_name and self.recorder_name != name:
                 continue
             recorder_info = recorder
         if not recorder_info:
             raise CriticalError(
                 'info for aws config recorder \'{}\' not found'.format(
                     self.recorder_name))
         recording = recorder_info['recording']
         last_status = recorder_info['lastStatus']
         if not recording:
             self.critical()
         if last_status.upper() == 'PENDING':
             self.warning()
         elif last_status.upper() != 'SUCCESS':
             self.critical()
         self.msg = 'AWS config recorder \'{}\' recording: {}, lastStatus: {}'\
                    .format(self.recorder_name, recording, last_status)
     else:
         self.check_recorders(recorders)
 def run(self):
     self.no_args()
     host = self.get_opt('host')
     port = self.get_opt('port')
     validate_host(host)
     validate_port(port)
     expected = self.get_opt('expected')
     if expected is not None:
         validate_regex(expected)
         log.info('expected version regex: %s', expected)
     data = None
     try:
         #conn = socket.create_connection('%(host)s:%(port)s' % locals(), timeout=self.timeout/2)
         #conn = socket.create_connection('%s:%s' % (host, port), timeout=self.timeout/2)
         conn = socket.create_connection((host, port), timeout=self.timeout/2)
         conn.sendall('envi')
         data = conn.recv(1024)
         conn.close()
     except socket.error as _:
         raise CriticalError('Failed to connect to ZooKeeper: ' + str(_))
     version = None
     log.debug(data.strip())
     for line in data.split('\n'):
         _ = self.version_line_regex.match(line)
         if _:
             version = _.group(1)
             break
     if not version:
         raise UnknownError('ZooKeeper version not found in output. {0}'.format(support_msg_api()))
     if not isVersion(version):
         raise UnknownError('ZooKeeper version unrecognized \'{0}\'. {1}'.format(version, support_msg_api()))
     self.ok()
     self.msg = 'ZooKeeper version = {0}'.format(version)
     if expected is not None and not re.match(expected, version):
         self.msg += " (expected '{0}')".format(expected)
         self.critical()
Exemplo n.º 14
0
 def connection_cancel_callback(self):
     raise CriticalError('{name} broker {host}:{port} sent channel cancel notification'\
                         .format(name=self.name, host=self.host, port=self.port))
Exemplo n.º 15
0
 def connection_timeout_handler(self):
     raise CriticalError("connection timed out while communicating with {name} broker '{host}:{port}'"\
                         .format(name=self.name, host=self.host, port=self.port))
Exemplo n.º 16
0
 def connection_blocked_callback(method):
     # could really be a warning
     raise CriticalError('connection blocked: {0}'.format(method.reason) + \
                         '(is the RabbitMQ broker low on resources eg. RAM / disk?)')
Exemplo n.º 17
0
 def check_channel(self):
     log.debug('checking channel is still open')
     if not self.channel.is_open:
         raise CriticalError('channel closed')
Exemplo n.º 18
0
 def check_connection(self):
     log.debug('checking connection is still open')
     if not self.conn.is_open:
         raise CriticalError('connection closed')
Exemplo n.º 19
0
 def run(self):
     try:
         super(CheckRabbitMQ, self).run()
     except (pika.exceptions.AMQPError, pika.exceptions.ChannelError, pika.exceptions.RecursionError):
         err = self.exception_msg()
         raise CriticalError(err)
Exemplo n.º 20
0
 def connection_timeout_handler(self):
     raise CriticalError(
         'connection timed out to {name} broker'.format(name=self.name))
 def tmp(req):
     if req.status_code != 200:
         err = ''
         if req.content and isStr(req.content) and len(req.content.split('\n')) < 2:
             err += ': ' + req.content
         raise CriticalError("{0}: '{1}' {2}{3}".format(msg, req.status_code, req.reason, err))
Exemplo n.º 22
0
    def run(self):
        server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port)
        try:
            log.debug('setting up Jenkins connection to %s', server_url)
            start_time = time.time()
            server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3)
            if log.isEnabledFor(logging.DEBUG):
                log.debug('getting user')
                user = server.get_whoami()
                log.debug('connected as user %s', jsonpp(user))
                #log.debug('getting version')
                # bug - https://bugs.launchpad.net/python-jenkins/+bug/1578626
                #version = server.get_version()
                #log.debug('Jenkins server version is %s', version)
            if self.list_jobs:
                log.debug('getting jobs')
                #jobs = server.get_jobs()
                # recursively get all jobs
                jobs = server.get_all_jobs()
                # more efficient with many folders
#                jobs = server.run_script("""
#                    import groovy.json.JsonBuilder;
#
#                    // get all projects excluding matrix configuration
#                    // as they are simply part of a matrix project.
#                    // there may be better ways to get just jobs
#                    items = Jenkins.instance.getAllItems(AbstractProject);
#                    items.removeAll {
#                      it instanceof hudson.matrix.MatrixConfiguration
#                    };
#
#                    def json = new JsonBuilder()
#                    def root = json {
#                      jobs items.collect {
#                        [
#                          name: it.name,
#                          url: Jenkins.instance.getRootUrl() + it.getUrl(),
#                          color: it.getIconColor().toString(),
#                          fullname: it.getFullName()
#                        ]
#                      }
#                    }
#
#                    // use json.toPrettyString() if viewing
#                    println json.toString()
#                    """)
                print('Jenkins Jobs:\n')
                for job in jobs:
                    print(job['fullname'])
                sys.exit(ERRORS['UNKNOWN'])

            log.debug('checking job exists')
            # less informative error message
            #assert server.job_exists(self.job) # True
            # this will give an intuitive error that a job doesn't exist
            # rather than letting it fail later with 'request object not found'
            server.assert_job_exists(self.job)

            log.debug('getting last build num for job %s', self.job)
            last_completed_build = server.get_job_info(self.job)['lastCompletedBuild']
            if not last_completed_build:
                raise WarningError("job '{job}' not built yet".format(job=self.job))
            latest_build = last_completed_build['number']
            log.debug('getting build info for job %s, latest build num %s', self.job, latest_build)
            build_info = server.get_build_info(self.job, latest_build)
            log.debug('build info: %s', build_info)
            self.process_build_info(build_info)
        except jenkins.JenkinsException as _:
            raise CriticalError(_)

        query_time = time.time() - start_time
        self.msg += ' query_time={0:.4f}s'.format(query_time)
Exemplo n.º 23
0
    def run(self):
        job_id = self.get_opt('job_id')
        travis_token = self.get_opt('travis_token')
        if job_id is None:
            travis_token = os.getenv('JOB_ID')
        if travis_token is None:
            travis_token = os.getenv('TRAVIS_TOKEN')
        #if travis_token is None:
        #    self.usage('--travis-token option or ' +
        #               '$TRAVIS_TOKEN environment variable required to authenticate to the API')
        validate_chars(job_id, 'job id', '0-9')
        validate_alnum(travis_token, 'travis token')

        headers = {
            'Content-Type': 'application/json',
            'Accept': 'application/json',
            'Travis-API-Version': '3',
            'Authorization': 'token {0}'.format(travis_token)
        }
        log.info('triggering debug job {job_id}'.format(job_id=job_id))
        url = 'https://api.travis-ci.org/job/{job_id}/debug'.format(
            job_id=job_id)
        log.debug('POST %s' % url)
        try:
            req = requests.post(url, headers=headers)
        except requests.exceptions.RequestException as _:
            raise CriticalError(_)
        log.debug("response: %s %s", req.status_code, req.reason)
        log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(),
                  '=' * 80)
        if req.status_code == 409:
            error_message = ''
            try:
                _ = json.loads(req.content)
                error_message = _['error_message']
            except ValueError:
                pass
            error_message += (
                " (if you've just retriggered this you can avoid this error " +
                "using the --ignore-running switch)")
            if self.get_opt('ignore_running'):
                log.info('job already running (ignoring)')
            else:
                log.info('job already running')
                raise CriticalError('{0} {1}: {2}'.format(
                    req.status_code, req.reason, error_message))
        elif req.status_code != 202:
            raise CriticalError("%s %s" % (req.status_code, req.reason))

        # don't need to query this if using the API address rather than the web UI address
        # as we don't need to figure out the repo name, just use the job id by itself


#        url = 'https://api.travis-ci.org/job/{job_id}'.format(job_id=job_id)
#        log.debug('GET %s' % url)
#        try:
#            req = requests.get(url, headers=headers)
#        except requests.exceptions.RequestException as _:
#            raise CriticalError(_)
#        log.debug("response: %s %s", req.status_code, req.reason)
#        log.debug("content:\n%s\n%s\n%s", '='*80, req.content.strip(), '='*80)
#        if req.status_code != 200:
#            raise CriticalError("%s %s" % (req.status_code, req.reason))
#
#        repo = None
#        try:
#            repo = json.loads(req.content)['repository']['slug']
#        except ValueError as _:
#            raise

        ssh_address = self.get_ssh_address(job_id=job_id)
        log.info('Executing: ssh -- {0}'.format(ssh_address))
        sys.stdout.flush()
        sys.stderr.flush()
        self.disable_timeout()
        os.execvp('ssh', ['--', ssh_address])
Exemplo n.º 24
0
 def connection_timeout_handler():
     raise CriticalError("unique message not returned on queue '{queue}' within {secs:.2f} secs"\
                         .format(queue=self.queue, secs=self.timeout / 3) + \
                         ", consumer timed out while consuming messages from {name} broker '{host}:{port}'"\
                         .format(name=self.name, host=self.host, port=self.port))
Exemplo n.º 25
0
 def run(self):
     try:
         super(CheckKafka, self).run()
     except KafkaError as _:
         raise CriticalError(_)
Exemplo n.º 26
0
 def connection_cancel_callback():
     raise CriticalError('broker sent channel cancel notification')
Exemplo n.º 27
0
 def exception_handler(self, arg):  # pylint: disable=no-self-use
     # TODO: improve this to extract connection refused for more concise errors
     raise CriticalError(arg)
Exemplo n.º 28
0
 def check_response_code(self, req):  # pylint: disable=no-self-use
     if req.status_code != 200:
         raise CriticalError("%s %s" % (req.status_code, req.reason))
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         live_nodes = json_data['beans'][0]['LiveNodes']
         live_node_data = json.loads(live_nodes)
         num_datanodes = len(live_node_data)
         if num_datanodes < 1:
             raise CriticalError("no live datanodes returned by JMX API from namenode '{0}:{1}'"\
                                 .format(self.host, self.port))
         min_space = None
         max_space = 0
         for datanode in live_node_data:
             used_space = live_node_data[datanode]['usedSpace']
             if not isInt(used_space):
                 raise UnknownError(
                     'usedSpace {} is not an integer! {}'.format(
                         used_space, support_msg_api()))
             used_space = int(used_space)
             log.info("datanode '%s' used space = %s", datanode, used_space)
             if min_space is None or used_space < min_space:
                 min_space = used_space
             if used_space > max_space:
                 max_space = used_space
         divisor = max_space
         if divisor < 1:
             log.info(
                 'min used space < 1, resetting divisor to 1 (% will likely be very high)'
             )
             divisor = 1
         if max_space < min_space:
             raise UnknownError('max_space < min_space')
         largest_imbalance_pc = float('{0:.2f}'.format(
             ((max_space - min_space) / divisor) * 100))
         if largest_imbalance_pc < 0:
             raise UnknownError('largest_imbalance_pc < 0')
         self.ok()
         self.msg = '{0}% HDFS imbalance on space used'.format(
             largest_imbalance_pc)
         self.check_thresholds(largest_imbalance_pc)
         self.msg += ' across {0:d} datanode{1}'.format(
             num_datanodes, plural(num_datanodes))
         if self.verbose:
             self.msg += ', min used space = {0}, max used space = {1}'.format(
                 min_space, max_space)
         if self.verbose and (self.is_warning() or self.is_critical()):
             self.msg += ' [imbalanced nodes: '
             for datanode in live_node_data:
                 used_space = live_node_data[datanode]['usedSpace']
                 if (used_space / max_space *
                         100) > self.thresholds['warning']['upper']:
                     self.msg += '{0}({1:.2f%}),'.format(
                         datanode, used_space)
             self.msg = self.msg.rstrip(',') + ']'
         self.msg += " | 'HDFS imbalance on space used %'={0}".format(
             largest_imbalance_pc)
         self.msg += self.get_perf_thresholds()
         self.msg += " num_datanodes={0}".format(num_datanodes)
         self.msg += " min_used_space={0}".format(min_space)
         self.msg += " max_used_space={0}".format(max_space)
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
Exemplo n.º 30
0
 def connection_blocked_callback(method):
     # could really be a warning
     raise CriticalError('connection blocked: {0}'.format(method.reason))