def processLine(self, line, logger=None): """Check line for errors, warnings, fatals and recognised error patterns. Return ErrorInfo object if any match is found, and None otherwise""" line = line.rstrip() # see where we are in the execution if self.stage == 'initialise': if AthenaLogChecker._startOfExecuteRE.match(line): if logger: logger.info("Athena execute()...") self.stage = 'execute' return None elif self.stage == 'execute': if AthenaLogChecker._startOfFinaliseRE.match(line): if logger: logger.info("Athena finalise()...") self.stage = 'finalise' self.event = None return None match = AthenaLogChecker._eventNumberRE.match(line) if match: self.event = match.group('event') if logger: logger.debug("Athena event %s", self.event) return None if AthenaLogChecker._noMoreEventNumberRE.match(line): oldEvent = self.event self.event = None if logger and oldEvent is not None: logger.debug("No more event numbers available") return None # match ignore patterns ignore = AtlasErrorCodes.matchIgnorePattern(line, self.release) if ignore: if ignore.re.pattern == r'.*?\s+?INFO .+': return None self.ignoreCount += 1 if logger: logger.debug("ignoring error in line: \"%s\"", line) logger.debug(" because it matched: \"%s\"", ignore.re.pattern) return None # then match known error patterns match, err = AtlasErrorCodes.matchErrorPattern(line, self.release) if err: self.processError(err) if logger: logger.debug("matched error category %s in line: %s", err.category.acronym, line) logger.debug(" because it matched: \"%s\"", match.re.pattern) return err # finally, perform generic error match err = self.extractError(line) if err: self.processError(err) if logger: logger.verbose("non-matched error in line: %s", line) return err return None
def handleSystemExit(self, e): try: status = e.args[0] except Exception: status = 0 if status == 0: return AtlasErrorCodes.ErrorInfo(acronym='OK') return AtlasErrorCodes.ErrorInfo(acronym='ATH_FAILURE', severity=FATAL, message='SystemExit %s' % status)
def __init__(self, release): self.release = release self.reset() # read error categories, error patterns and ignore patterns if needed if not AtlasErrorCodes.categoriesCount(): AtlasErrorCodes.readCategories() if not AtlasErrorCodes.errorPatternsCount(): AtlasErrorCodes.readErrorPatterns() if not AtlasErrorCodes.ignorePatternsCount(): AtlasErrorCodes.readIgnorePatterns() self.vTimer = VTimer.VTimer('AthenaLogChecker')
def handleDllLoadError(self, e): # try to find the guilty one import subprocess from PyJobTransformsCore.trfutil import TRACEBACK_TEXT, find_in_stack from PyJobTransformsCore.envutil import find_library mess = None diag = None dllRE = r"^theApp.Dlls\s*[+]?=" stack = find_in_stack(dllRE) if stack: text = stack[TRACEBACK_TEXT] dllNameRE = r"([\w\.\-]+)" subRE = "%s%s%s%s" % (dllRE, r"\s*\[\s*\"", dllNameRE, r"\"\s*\]") dll = re.sub(subRE, r"\1", text) lib = 'lib%s.so' % (dll) full_lib = find_library(lib) mess = 'module %s can not be loaded' % (dll) diag = '' if not full_lib: diag += '%s not found.' % (lib) else: self.logger().debug("Found %s. Checking dependencies...", full_lib) lddOut = subprocess.getoutput('ldd %s' % (full_lib)) missLibs = [] subRE = "%s%s%s" % (r"^\s*", dllNameRE, r"\s+.*not found\s*.*$") for line in lddOut: if re.search("not found", line): misLib = re.sub(subRE, r"\1", line) missLibs.append(misLib) fullMissLib = find_library(misLib) if fullMissLib: dir = os.path.dirname(fullMissLib) if not fileutil.access(fullMissLib, os.R_OK): diag += "%s is found in but can not be read from %s." % ( fullMissLib, dir) diag += os.linesep else: diag += "%s is found but ldd can not load it from %s." % ( misLib, dir) diag += os.linesep if dir.startswith('/afs/'): diag += "Check your afs cache size, it may be too small." if len(missLibs) >= 1: diag += '%s can not be found' % (', '.join(missLibs)) if diag: diag += ' %s=%s' % (trfconsts.LD_LIBRARY_PATH, os.environ[trfconsts.LD_LIBRARY_PATH]) else: diag = None return AtlasErrorCodes.ErrorInfo(acronym='ATH_DLL_LOAD', severity=FATAL, message=mess, diagnosis=diag)
def handleTransformError(self, e): err = AtlasErrorCodes.ErrorInfo(acronym=e.error, severity=FATAL, message=e.message) # transfer all extras for n, v in e.extras.items(): if hasattr(err, n): setattr(err, n, v) else: err.message += os.linesep + '%s=%s' % (n, v) return err
def handleIncludeError(self, e): try: joPath = os.environ[trfconsts.JOBOPTIONSPATH] except KeyError: diag = 'Environment variable %s not defined' % ( trfconsts.JOBOPTIONSPATH) else: diag = 'Not found in %s=%s' % (trfconsts.JOBOPTIONSPATH, joPath) return AtlasErrorCodes.ErrorInfo(acronym='ATH_JOP_NOTFOUND', severity=FATAL, message=e.args, diagnosis=diag)
def handleRuntimeError(self, e): mess = '' if hasattr(e, 'args'): if type(e.args) == str: mess = e.args elif type(e.args) in (list, tuple) and type(e.args[0]) == str: mess = e.args[0] if 'C++ exception' in mess: return AtlasErrorCodes.ErrorInfo(acronym='ATH_EXC_CXX', severity=FATAL, message='%s: %s' % (e.__class__.__name__, e.args)) return None
def extractError(self, line): """For ERROR,WARNING and FATAL messages, return ErrorInfo object with who, severity and message field filled. For all other messages return None""" line = line.rstrip() lineREC = re.compile( r"(^\S*\s*(?=WARNING|ERROR|FATAL))(WARNING|ERROR|FATAL)\:?\s+(.+$)" ) match = lineREC.search(line) if match: who = match.group(1).strip() # no more than 2 words in algorithm name if (len(who.split()) > 2): return None severity = match.group(2).strip() if severity == 'FATAL': severity = FATAL elif severity == 'ERROR': severity = ERROR elif severity == 'WARNING': severity = WARNING message = match.group(3) if not who: who = "(Unknown)" if message.startswith(':'): if len(message) > 1: message = message[1:] else: message = '' if not message: message = None if severity == ERROR or severity == FATAL: acronym = 'TRF_UNKNOWN' else: acronym = None # count return AtlasErrorCodes.ErrorInfo(acronym=acronym, severity=severity, who=who, message=message) else: return None
def handlePythonSyntaxError(self, e): return AtlasErrorCodes.ErrorInfo(acronym='TRF_PYT_SYNTAX', severity=FATAL, message='%s: %s' % (e.__class__.__name__, e.args))
def handleKeyboardInterrupt(self, e): return AtlasErrorCodes.ErrorInfo( acronym='TRF_KEY_INTERRUPT', severity=FATAL, diagnosis= "You pressed the interrupt key on the keyboard (usually CRTL-C)")
sys.exit(0) else: # unsuccessful athena job ## Create a blank JobReport instance and populate it with the error detected. jobReport = JobReport() jobReport.setProducer('athena') jobReport.addError(err) jobReport.write() print '%s - exit code %s' % (' '.join(sys.argv), rc) sys.exit(rc) # Exceptions derived from exceptions.Exception except Exception, e: err = trferr.errorHandler.handleException(e) if err is None: err = AtlasErrorCodes.ErrorInfo(acronym='ATH_EXC_PYT', severity=AtlasErrorCodes.FATAL, message='%s: %s' % (e.__class__.__name__, e.args)) # Some throw a string except str, e: err = AtlasErrorCodes.ErrorInfo(acronym='ATH_EXC_PYT', severity=AtlasErrorCodes.FATAL, message=e) # Catch all other exceptions except: err = AtlasErrorCodes.ErrorInfo(acronym='ATH_EXC_PYT', severity=AtlasErrorCodes.FATAL) if err is not None: jobReport = JobReport() jobReport.setProducer('athena')