示例#1
0
 def process_file(self, filename):
     if self.is_excluded(filename):
         return
     if filename == '-':
         self.iostream = sys.stdin
         self.process_json(sys.stdin.read(), '<STDIN>')
     else:
         # reset this flag which we use to only print single quote detection once per file
         self.single_quotes_detected = False
         try:
             with open(filename) as self.iostream:
                 # check if it's a Big Data format file with json doc on first line
                 # this is more efficient than slurping a large file only to fail with out of memory
                 for _ in range(1, 10):
                     line = self.iostream.readline()
                     if line:
                         if isJson(line) or \
                            isJson(self.convert_single_quoted(line)) or \
                            isJson(self.convert_single_quoted_escaped(line)):
                             log.debug("header line of '{0}' detected as a valid JSON document".format(filename) +
                                       ", assuming Big Data format multi-line json")
                             self.process_multirecord_json(filename)
                             break
                 else:
                     try:
                         self.iostream.seek(0)
                         content = self.iostream.read()
                         self.process_json(content, filename)
                     except MemoryError:
                         # may be a big data format after all and perhaps the first record was broken
                         log.warning("memory error validating contents from file '{0}', ".format(filename) +
                                     "assuming Big Data multi-record json and re-trying validation line-by-line")
                         self.process_multirecord_json(filename)
         except IOError as _:
             die("ERROR: %s" % _)
示例#2
0
 def parse_builds(self, content):
     log.debug('parsing build info')
     build = None
     collected_builds = []
     json_data = json.loads(content)
     if not json_data or \
        'builds' not in json_data or \
        not json_data['builds']:
         qquit(
             'UNKNOWN', "no Travis CI builds returned by the Travis API." +
             " Either the specified repo '{0}' doesn't exist".format(
                 self.repo) + " or no builds have happened yet?" +
             " Also remember the repo is case sensitive, for example 'harisekhon/nagios-plugins' returns this"
             +
             " blank build set whereas 'HariSekhon/Nagios-Plugins' succeeds"
             + " in returning latest builds information")
     builds = json_data['builds']
     # get latest finished failed build
     last_build_number = None
     found_newer_passing_build = False
     for _ in builds:
         # API returns most recent build first
         # extra check to make sure we're getting the very latest build number and API hasn't changed
         build_number = _['number']
         if not isInt(build_number):
             raise UnknownError('build number returned is not an integer!')
         build_number = int(build_number)
         if last_build_number is None:
             last_build_number = int(build_number) + 1
         if build_number >= last_build_number:
             raise UnknownError('build number returned is out of sequence, cannot be >= last build returned' + \
                                '{0}'.format(support_msg_api()))
         last_build_number = build_number
         if self.completed:
             if len(collected_builds) < self.num and _['state'] in (
                     'passed', 'finished', 'failed', 'errored'):
                 collected_builds.append(_)
         elif self.failed:
             if _['state'] == 'passed':
                 if not collected_builds and not found_newer_passing_build:
                     log.warning("found more recent successful build #%s with state = '%s'" + \
                                 ", you may not need to debug this build any more", _['number'], _['state'])
                     found_newer_passing_build = True
             elif _['state'] in ('failed', 'errored'):
                 if len(collected_builds) < self.num:
                     collected_builds.append(_)
                     # by continuing to iterate through the rest of the builds we can check
                     # their last_build numbers are descending for extra sanity checking
                     #break
         elif len(collected_builds) < self.num:
             collected_builds.append(_)
             # by continuing to iterate through the rest of the builds we can check
             # their last_build numbers are descending for extra sanity checking
             #break
     if not collected_builds:
         qquit('UNKNOWN', 'no recent builds found')
     if log.isEnabledFor(logging.DEBUG):
         for build in collected_builds:
             log.debug("build:\n%s", jsonpp(build))
     return collected_builds
示例#3
0
文件: cli.py 项目: HariSekhon/pylib
 def __parse_args__(self):
     try:
         (self.options, self.args) = self.__parser.parse_args()
     # I don't agree with zero exit code from OptionParser for help/usage,
     # and want UNKNOWN not CRITICAL(2) for switch mis-usage...
     except SystemExit:  # pragma: no cover
         sys.exit(ERRORS['UNKNOWN'])
     if self.options.help:  # pragma: no cover
         self.usage()
     if self.options.version:  # pragma: no cover
         print('%(version)s' % self.__dict__)
         sys.exit(ERRORS['UNKNOWN'])
     if 'timeout' in dir(self.options):
         self.timeout = self.get_opt('timeout')
     env_verbose = os.getenv('VERBOSE')
     if isInt(env_verbose):
         if env_verbose > self.verbose:
             log.debug('environment variable $VERBOSE = %s, increasing verbosity', env_verbose)
             self.verbose = env_verbose
     elif env_verbose is None:
         pass
     else:
         log.warning("$VERBOSE environment variable is not an integer ('%s')", env_verbose)
     self.parse_args()
     return self.options, self.args
示例#4
0
 def validate_csvreader(csvreader, filename):
     count = 0
     try:
         # csvreader doesn't seem to generate any errors ever :-(
         # csv module allows entire lines of json/xml/yaml to go in as a single field
         # Adding some invalidations manually
         for field_list in csvreader:
             # list of fields with no separator information
             log.debug("line: %s", field_list)
             # make it fail if there is only a single field on any line
             if len(field_list) < 3:
                 log.error("less than 3 fields detected, aborting conversion of file '%s'", filename)
                 return None
             # extra protection along the same lines as anti-json:
             # the first char of field should be alphanumeric, not syntax
             # however instead of isAlnum allow quotes for quoted CSVs to pass validation
             if field_list[0] != "" and not isChars(field_list[0][0], 'A-Za-z0-9"'):
                 log.error('non-alphanumeric / quote opening character detected in CSV')
                 return None
             count += 1
     except csv.Error as _:
         log.warning('file %s, line %s: %s', filename, csvreader.line_num, _)
         return None
     if count == 0:
         log.error('zero lines detected, blank input is not valid CSV')
         return None
     return csvreader
示例#5
0
 def load_file(filename, boundary=False):
     log.info('loading custom regex patterns from %s', filename)
     regex_list = []
     re_ending_pipe = re.compile(r'\|\s*$')
     re_leading_space = re.compile(r'^\s*')
     with open(filename) as filehandle:
         for line in filehandle:
             line = line.rstrip('\n')
             line = line.rstrip('\r')
             line = line.split('#')[0]
             line = re_ending_pipe.sub('', line)
             line = re_leading_space.sub('', line)
             if not line:
                 continue
             if not isRegex(line):
                 log.warning('ignoring invalid regex from %s: %s',
                             os.path.basename(filename), line)
                 continue
             if boundary:
                 line = r'(\b|[^A-Za-z])' + line + r'(\b|[^A-Za-z])'
             regex_list.append(line)
     raw = '|'.join(regex_list)
     #log.debug('custom_raw: %s', raw)
     regex_list = [re.compile(_, re.I) for _ in regex_list]
     return (regex_list, raw)
示例#6
0
文件: cli.py 项目: FavioVazquez/pylib
 def __parse_timeout__(self):
     # reset this to none otherwise unit tests fail to take setting from timeout_default
     # use __timeout to bypass the property setter checks
     self.__timeout = None
     if 'timeout' in dir(self.options):
         timeout = self.get_opt('timeout')
         if timeout is not None:
             log.debug('getting --timeout value %s', self.timeout)
             self.timeout = timeout
     if self.timeout is None:
         env_timeout = os.getenv('TIMEOUT')
         log.debug('getting $TIMEOUT value %s', env_timeout)
         if env_timeout is not None:
             log.debug('env_timeout is not None')
             if isInt(env_timeout):
                 log.debug(
                     "environment variable $TIMEOUT = '%s' and timeout not already set, setting timeout = %s",
                     env_timeout, env_timeout)
                 self.timeout = int(env_timeout)
             else:
                 log.warning(
                     "$TIMEOUT environment variable is not an integer ('%s')",
                     env_timeout)
     if self.timeout is None:
         log.debug('timeout not set, using default timeout %s',
                   self.timeout_default)
         self.timeout = self.timeout_default
示例#7
0
 def process_json(self, content, filename):
     log.debug('process_json()')
     if not content:
         log.warning("blank content passed to process_json for contents of file '%s'", filename)
     if isJson(content):
         print(json.dumps(json.loads(content)))
         return True
     elif self.permit_single_quotes:
         log.debug('checking for single quoted JSON')
         # check if it's regular single quoted JSON a la MongoDB
         json_single_quoted = self.convert_single_quoted(content)
         if self.process_json_single_quoted(json_single_quoted, filename):
             return True
         log.debug('single quoted JSON check failed, trying with pre-escaping double quotes')
         # check if it's single quoted JSON with double quotes that aren't escaped,
         # by pre-escaping them before converting single quotes to doubles for processing
         json_single_quoted_escaped = self.convert_single_quoted_escaped(content)
         if self.process_json_single_quoted(json_single_quoted_escaped, filename):
             log.debug("processed single quoted json with non-escaped double quotes in '%s'", filename)
             return True
         log.debug('single quoted JSON check failed even with pre-escaping any double quotes')
     self.failed = True
     log.error("invalid json detected in '%s':", filename)
     printerr(content)
     if not self.continue_on_error:
         sys.exit(ERRORS['CRITICAL'])
     return False
示例#8
0
文件: cli.py 项目: FavioVazquez/pylib
    def __parse_verbose__(self):
        self.verbose += int(self.get_opt('verbose'))
        env_verbose = os.getenv('VERBOSE')
        if isInt(env_verbose):
            if env_verbose > self.verbose:
                log.debug(
                    'environment variable $VERBOSE = %s, increasing verbosity',
                    env_verbose)
                self.verbose = int(env_verbose)
        elif env_verbose is None:
            pass
        else:
            log.warning(
                "$VERBOSE environment variable is not an integer ('%s')",
                env_verbose)

        if self.is_option_defined('quiet') and self.get_opt('quiet'):
            self.verbose = 0
        elif self.verbose > 2:
            log.setLevel(logging.DEBUG)
        elif self.verbose > 1:
            log.setLevel(logging.INFO)
        elif self.verbose > 0 and self._prog[0:6] != 'check_':
            log.setLevel(logging.WARN)
        if self.options.debug:
            log.setLevel(logging.DEBUG)  # pragma: no cover
            log.debug('enabling debug logging')
            if self.verbose < 3:
                self.verbose = 3
    def process_result(self, result):
        _id = result['id']
        log.info('latest build id: %s', _id)

        status = result['status']
        log.info('status: %s', status)
        if not isInt(status, allow_negative=True):
            raise UnknownError(
                'non-integer status returned by DockerHub API. {0}'.format(
                    support_msg_api()))

        tag = result['dockertag_name']
        log.info('tag: %s', tag)

        trigger = result['cause']
        log.info('trigger: %s', trigger)

        created_date = result['created_date']
        log.info('created date: %s', created_date)

        last_updated = result['last_updated']
        log.info('last updated: %s', last_updated)

        created_datetime = datetime.datetime.strptime(
            created_date.split('.')[0], '%Y-%m-%dT%H:%M:%S')
        updated_datetime = datetime.datetime.strptime(
            last_updated.split('.')[0], '%Y-%m-%dT%H:%M:%S')
        build_latency_timedelta = updated_datetime - created_datetime
        build_latency = build_latency_timedelta.total_seconds()
        log.info('build latency (creation to last updated): %s', build_latency)
        # results in .0 floats anyway
        build_latency = int(build_latency)

        build_code = result['build_code']
        build_url = 'https://hub.docker.com/r/{0}/builds/{1}'.format(
            self.repo, build_code)
        log.info('latest build URL: %s', build_url)

        if str(status) in self.statuses:
            status = self.statuses[str(status)]
        else:
            log.warning("status code '%s' not recognized! %s", status,
                        support_msg_api())
            log.warning('defaulting to assume status is an Error')
            status = 'Error'
        if status != 'Success':
            self.critical()
        self.msg += "'{repo}' last completed build status: '{status}', tag: '{tag}', build code: {build_code}"\
                    .format(repo=self.repo, status=status, tag=tag, build_code=build_code)
        if self.verbose:
            self.msg += ', id: {0}'.format(_id)
            self.msg += ', trigger: {0}'.format(trigger)
            self.msg += ', created date: {0}'.format(created_date)
            self.msg += ', last updated: {0}'.format(last_updated)
            self.msg += ', build_latency: {0}'.format(sec2human(build_latency))
            self.msg += ', build URL: {0}'.format(build_url)
        self.msg += ' | build_latency={0:d}s'.format(build_latency)
    def process_result(self, result):
        _id = result['id']
        log.info('latest build id: %s', _id)

        status = result['status']
        log.info('status: %s', status)
        if not isInt(status, allow_negative=True):
            raise UnknownError('non-integer status returned by DockerHub API. {0}'.format(support_msg_api()))

        tag = result['dockertag_name']
        log.info('tag: %s', tag)

        trigger = result['cause']
        log.info('trigger: %s', trigger)

        created_date = result['created_date']
        log.info('created date: %s', created_date)

        last_updated = result['last_updated']
        log.info('last updated: %s', last_updated)

        created_datetime = datetime.datetime.strptime(created_date.split('.')[0], '%Y-%m-%dT%H:%M:%S')
        updated_datetime = datetime.datetime.strptime(last_updated.split('.')[0], '%Y-%m-%dT%H:%M:%S')
        build_latency_timedelta = updated_datetime - created_datetime
        build_latency = build_latency_timedelta.total_seconds()
        log.info('build latency (creation to last updated): %s', build_latency)
        # results in .0 floats anyway
        build_latency = int(build_latency)

        build_code = result['build_code']
        build_url = 'https://hub.docker.com/r/{0}/builds/{1}'.format(self.repo, build_code)
        log.info('latest build URL: %s', build_url)

        if str(status) in self.statuses:
            status = self.statuses[str(status)]
        else:
            log.warning("status code '%s' not recognized! %s", status, support_msg_api())
            log.warning('defaulting to assume status is an Error')
            status = 'Error'
        if status != 'Success':
            self.critical()
        self.msg += "'{repo}' last completed build status: '{status}', tag: '{tag}', build code: {build_code}"\
                    .format(repo=self.repo, status=status, tag=tag, build_code=build_code)
        if self.verbose:
            self.msg += ', id: {0}'.format(_id)
            self.msg += ', trigger: {0}'.format(trigger)
            self.msg += ', created date: {0}'.format(created_date)
            self.msg += ', last updated: {0}'.format(last_updated)
            self.msg += ', build_latency: {0}'.format(sec2human(build_latency))
            self.msg += ', build URL: {0}'.format(build_url)
        self.msg += ' | build_latency={0:d}s'.format(build_latency)
示例#11
0
 def parse_table(self, row):
     #log.debug(row)
     user = row[self.indicies['user_index']]
     # 'hari.sekhon' in '*****@*****.**' in kerberos
     if self.re_ignored_users and self.re_ignored_users.match(user):
         log.debug('skipping row for ignored user %s: %s', user, row)
         return (None, None)
     database = row[self.indicies['database_index']].strip()
     table = row[self.indicies['table_index']].strip()
     if not database or not table or not self.re_table.match('{}.{}'.format(database, table)):
         #log.info('table not found in fields for row: %s', row)
         operation = row[self.indicies['operation_index']]
         if operation in self.operations_to_ignore:
             return (None, None)
         elif operation == 'QUERY':
             query = row[self.indicies['sql_index']]
             # cheaper than re_ignore to pre-filter
             if query in ('GET_TABLES', 'GET_SCHEMAS', 'INVALIDATE METADATA'):
                 return (None, None)
             (database, table) = self.get_db_table_from_resource(row)
             if database and table:
                 pass
             else:
                 log.debug('database/table not found in row: %s', row)
                 log.debug('trying to parse: %s', query)
                 match = self.re_select_from_table.search(query)
                 if match:
                     table = match.group(1)
                     if '.' in table:
                         (database, table) = table.split('.', 1)
                 # could use .search but all these seem to be at beginning
                 elif self.re_ignore.match(query):
                     return (None, None)
                 else:
                     log.warning('failed to parse database/table from query: %s', query)
                     return (None, None)
         else:
             log.debug('database/table not found in row and operation is not a query to parse: %s', row)
             return (None, None)
     if not table and not database:
         return (None, None)
     table = table.lower().strip('`')
     database = database.lower().strip('`')
     if ' ' in table:
         raise CriticalError('table \'{}\' has spaces - parsing error for row: {}'.format(table, row))
     if ' ' in database:
         raise CriticalError('database \'{}\' has spaces - parsing error for row: {}'.format(database, row))
     if table == 'null':
         raise CriticalError('table == null - parsing error for row: {}'.format(row))
     return (database, table)
示例#12
0
 def process_csv(self, filehandle):
     csvreader = None
     try:
         if self.delimiter is not None:
             try:
                 csvreader = csv.reader(filehandle,
                                        delimiter=self.delimiter,
                                        quotechar=self.quotechar)
             except TypeError as _:
                 self.usage(_)
         else:
             # dialect = csv.excel
             dialect = csv.Sniffer().sniff(filehandle.read(1024))
             # this will raise an Error if invalid
             dialect.strict = True
             filehandle.seek(0)
             csvreader = csv.reader(filehandle, dialect)
     except csv.Error as _:
         log.warning('file %s: %s', self.filename, _)
         return False
     count = 0
     try:
         # csvreader doesn't seem to generate any errors ever :-(
         # csv module allows entire lines of json/xml/yaml to go in as a single field
         # Adding some invalidations manually
         for field_list in csvreader:
             # list of fields with no separator information
             # log.debug("line: %s", _)
             # make it fail if there is only a single field on any line
             if len(field_list) < 2:
                 return False
             # it's letting JSON through :-/
             if field_list[0] == '{':
                 return False
             # extra protection along the same lines as anti-json:
             # the first char of field should be alphanumeric, not syntax
             # however instead of isAlnum allow quotes for quoted CSVs to pass validation
             if not isChars(field_list[0][0], 'A-Za-z0-9\'"'):
                 return False
             count += 1
     except csv.Error as _:
         log.warning('file %s, line %s: %s', self.filename,
                     csvreader.line_num, _)
         return False
     if count == 0:
         log.debug('zero lines detected, blank input is not valid CSV')
         return False
     log.debug('%s CSV lines passed', count)
     return True
示例#13
0
 def output(self, row, database, table):
     if not self.re_table.match('{}.{}'.format(database, table)):
         log.warning('%s.%s does not match table regex', database, table)
         return
     #self.data[database] = self.data.get(database, {})
     #self.data[database][table] = 1
     if table and not database:
         log.info('got table but not database for row: %s', row)
     if database and not table:
         log.info('got database but not table for row: %s', row)
     if not table and not database:
         return
     self.csv_writer.writerow({'database': database, 'table': table})
     if log.isEnabledFor(logging.DEBUG):
         sys.stdout.flush()
 def parse_json(self, json_data):
     drillbits = json_data['drillbits']
     online_nodes = 0
     for drillbit in drillbits:
         if 'state' not in drillbit:
             raise UnknownError('state field not found, is this Apache Drill < 1.12?')
         if drillbit['state'] == 'ONLINE':
             online_nodes += 1
         else:
             log.warning("node '%s' state = '{}'", drillbit['address'])
     total_nodes = len(drillbits)
     offline_nodes = total_nodes - online_nodes
     self.msg = 'Apache Drill cluster: drillbits offline = {}'.format(offline_nodes)
     self.check_thresholds(offline_nodes)
     self.msg += ', drillbits online = {}'.format(online_nodes)
     self.msg += ', total drillbits = {}'.format(total_nodes)
     self.msg += ' | drillbits_offline={}{} drillbits_online={} drillbits_total={}'\
                 .format(offline_nodes, self.get_perf_thresholds(), online_nodes, total_nodes)
示例#15
0
 def output(self, row, database, table):
     if not self.re_table.match('{}.{}'.format(database, table)):
         log.warning('%s.%s does not match table regex', database, table)
         return
     # instead of collecting in ram, now just post-process through sort -u
     # this way it is easier to see live extractions, --debug and correlate
     #self.data[database] = self.data.get(database, {})
     #self.data[database][table] = 1
     if table and not database:
         log.info('got table but not database for row: %s', row)
     if database and not table:
         log.info('got database but not table for row: %s', row)
     if not table and not database:
         return
     #self.csv_writer.writerow({'database': database, 'table': table, 'user': row[self.indicies['user_index']]})
     self.csv_writer.writerow({'database': database, 'table': table})
     if log.isEnabledFor(logging.DEBUG):
         sys.stdout.flush()
 def parse_json(self, json_data):
     drillbits = json_data['drillbits']
     online_nodes = 0
     for drillbit in drillbits:
         if 'state' not in drillbit:
             raise UnknownError('state field not found, is this Apache Drill < 1.12?')
         if drillbit['state'] == 'ONLINE':
             online_nodes += 1
         else:
             log.warning("node '%s' state = '{}'", drillbit['address'])
     total_nodes = len(drillbits)
     offline_nodes = total_nodes - online_nodes
     self.msg = 'Apache Drill cluster: drillbits offline = {}'.format(offline_nodes)
     self.check_thresholds(offline_nodes)
     self.msg += ', drillbits online = {}'.format(online_nodes)
     self.msg += ', total drillbits = {}'.format(total_nodes)
     self.msg += ' | drillbits_offline={}{} drillbits_online={} drillbits_total={}'\
                 .format(offline_nodes, self.get_perf_thresholds(), online_nodes, total_nodes)
示例#17
0
 def parse_json(self, json_data):
     drillbits = json_data['drillbits']
     online_nodes = 0
     for drillbit in drillbits:
         if 'state' in drillbit:
             if drillbit['state'] == 'ONLINE':
                 online_nodes += 1
             else:
                 log.warning("node '%s' state = '{}'", drillbit['address'])
         else:
             online_nodes += 1
     self.msg = 'Apache Drill cluster: drillbits online = {}'.format(online_nodes)
     self.check_thresholds(online_nodes)
     total_nodes = len(drillbits)
     offline_nodes = total_nodes - online_nodes
     self.msg += ', drillbits offline = {}'.format(offline_nodes)
     self.msg += ', total drillbits = {}'.format(total_nodes)
     self.msg += ' | drillbits_online={}{} drillbits_offline={} drillbits_total={}'.format(online_nodes, self.get_perf_thresholds(), offline_nodes, total_nodes)
示例#18
0
 def check_multirecord_json(self):
     log.debug('check_multirecord_json()')
     normal_json = False
     single_quoted = False
     count = 0
     for line in self.iostream:
         if isJson(line):
             normal_json = True
             # can't use self.print() here, don't want to print valid for every line of a file / stdin
             if self.passthru:
                 print(line, end='')
             count += 1
             continue
         elif self.permit_single_quotes and self.check_json_line_single_quoted(
                 line):
             single_quoted = True
             if self.passthru:
                 print(line, end='')
             count += 1
             continue
         else:
             log.debug('invalid multirecord json')
             self.failed = True
             if not self.passthru:
                 die(self.invalid_json_msg)
             return False
     if count == 0:
         log.debug(
             'blank input, detected zero lines while multirecord checking')
         self.failed = True
         return False
     # self.multi_record_detected = True
     log.debug('multirecord json (all %s lines passed)', count)
     extra_info = ''
     if single_quoted:
         extra_info = ' single quoted'
         if normal_json:
             extra_info += ' mixed with normal json!'
             log.warning('mixture of normal and single quoted json detected, ' + \
                         'may cause issues for data processing engines')
     if not self.passthru:
         print('{0} (multi-record format{1}, {2} records)'.format(
             self.valid_json_msg, extra_info, count))
     return True
示例#19
0
 def get_csvreader(filename):
     try:
         filehandle = open(filename)
     except IOError as _:
         log.error(_)
         return None
     filename = os.path.basename(filename)
     try:
         dialect = csv.Sniffer().sniff(filehandle.read(1024))
         # this will raise an Error if invalid
         dialect.strict = True
         filehandle.seek(0)
         csvreader = csv.reader(filehandle, dialect)
     except csv.Error as _:
         log.warning('file %s: %s', filename, _)
         return None
     csvreader = CrunchAccountingCsvStatementConverter.validate_csvreader(csvreader, filename)
     filehandle.seek(0)
     return csvreader