Exemplo n.º 1
0
 def run(self, never_match_re=None):
     """ collect and report on all cloud-custodian Lambda errors """
     print(
         'Searching cloud-custodian Lambda functions for failed invocations'
     )
     lambda_names = LambdaHealthChecker.find_matching_func_names(
         re.compile(r'^(custodian-|cloud-custodian-).*'), self._region_name)
     logger.debug('Custodian Lambda functions: %s', lambda_names)
     errors = False
     self._get_sqs_dlq()
     logger.debug('%d failed Lambda invocations: %s',
                  len(self._failed_request_ids),
                  self._failed_request_ids.keys())
     for fname in lambda_names:
         if not self._check_function(fname, never_match_re=never_match_re):
             logger.info(
                 '_check_function returned False (NOT HEALTHY) for: %s',
                 fname)
             errors = True
         logger.debug('Sleeping %s seconds before checking next function',
                      self.INTER_FUNC_SLEEP)
         sleep(self.INTER_FUNC_SLEEP)
     self._ack_sqs()
     req_ids = [
         i for i in self._failed_request_ids
         if self._failed_request_ids[i] is None
     ]
     if len(req_ids) > 0:
         print("\n\n" +
               red('ERROR: %d failed Lambda RequestIDs could not be tied '
                   'to their function names: %s' %
                   (len(req_ids), req_ids)) + "\n\n")
     if errors:
         print('Some lambda functions had errors in the last '
               '%s' % self.INVL_DESC)
         raise SystemExit(1)
     print('No Lambda functions had errors in the last ' + self.INVL_DESC)
Exemplo n.º 2
0
    def _check_function(self, func_name, never_match_re=None):
        """
        Check health of one Lambda function. Print information on it to STDOUT.
        Return True for healthy, False if errors/failures.

        :param func_name: Lambda function name to check
        :type func_name: str
        :param never_match_re: Regex for logs to NEVER return, even if they
          match ``always_match_re``.
        :type never_match_re: ``re``
        :return: whether the function had errors/failures
        :rtype: bool
        """
        c = LambdaHealthChecker(func_name,
                                self._region_name,
                                logs=self._logs,
                                cw=self._cw)
        req_ids = [
            i for i in self._failed_request_ids
            if self._failed_request_ids[i] is None
        ]
        if self.ALL_ERROR_FUNCTIONS.match(func_name):
            logs = c.get_filtered_logs(req_ids,
                                       always_match_re=self.ALL_ERROR_LOG_RE,
                                       never_match_re=never_match_re)
        else:
            logs = c.get_filtered_logs(req_ids)
        metrics = c.get_cloudwatch_metric_sums()
        msg = []
        if metrics['Invocations'] > 0:
            throttle_pct = (metrics['Throttles'] /
                            metrics['Invocations']) * 100
            error_pct = (metrics['Errors'] / metrics['Invocations']) * 100
        else:
            throttle_pct = 0
            error_pct = 0
        if error_pct > 50:
            msg.append('Lambda Function Errors: %s%% (%d of %d invocations)' %
                       (error_pct, metrics['Errors'], metrics['Invocations']))
        if throttle_pct > 50:
            msg.append(
                'Lambda Function Throttles: %s%% (%d of %d invocations)' %
                (throttle_pct, metrics['Throttles'], metrics['Invocations']))
        if len(logs) < 1 and len(msg) == 0:
            print(green('%s: OK\n' % func_name))
            return True
        print(red('%s: ERRORS' % func_name))
        for m in msg:
            print("\t%s" % red(m))
        if len(logs) < 1:
            return True
        print("\n\tLogs For Failed Invocations:\n")
        for req_id in logs.keys():
            if req_id == 'always_match':
                continue
            events = logs[req_id]
            self._failed_request_ids[req_id] = func_name
            print("\t" + red('RequestID=%s logGroupName=%s logStreamName=%s' %
                             (req_id, events[0]['logGroupName'],
                              events[0]['logStreamName'])))
            for e in events:
                print("\n".join([
                    "\t\t%s" % line.replace("\t", ' ')
                    for line in e['message'].split("\n") if line.strip() != ''
                ]))
        if 'always_match' in logs:
            print(
                "\t" +
                red('Always-Match Logs (RequestID not in DLQ, but log matches '
                    'regex that we want to always alarm on)'))
            for e in logs['always_match']:
                print("\n".join([
                    "\t\t%s" % line.replace("\t", ' ')
                    for line in e['message'].split("\n") if line.strip() != ''
                ]))
        print('')
        return False
Exemplo n.º 3
0
 def test_red(self):
     assert red('foo') == "\033[0;31mfoo\033[0m"