Python BasicAlgos.tail примеры, WMCore.Algorithms.BasicAlgos.tail Python примеры использования

Пример #1

0

Показать файл

Файл: BasicAlgos_t.py Проект: ticoann/WMCore

    def test_tail(self):
        """
        _tail_

        Can we tail a file?
        """



        a = "a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\n"

        f = open('tmpfile.tmp', 'w')
        f.write(a)
        f.close()



        self.assertEqual(BasicAlgos.tail('tmpfile.tmp', 10),
                         ['g\n', 'h\n', 'i\n', 'j\n', 'k\n',
                          'l\n', 'm\n', 'n\n', 'o\n', 'p\n'])

        self.assertEqual(BasicAlgos.tail('tmpfile.tmp', 2),
                         ['o\n', 'p\n'])


        os.remove('tmpfile.tmp')


        return

Пример #2

0

Показать файл

Файл: BasicAlgos_t.py Проект: zhiwenuil/WMCore

    def test_tail(self):
        """
        _tail_

        Can we tail a file?
        """



        a = "a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\n"

        f = open('tmpfile.tmp', 'w')
        f.write(a)
        f.close()



        self.assertEqual(BasicAlgos.tail('tmpfile.tmp', 10),
                         ['g\n', 'h\n', 'i\n', 'j\n', 'k\n',
                          'l\n', 'm\n', 'n\n', 'o\n', 'p\n'])

        self.assertEqual(BasicAlgos.tail('tmpfile.tmp', 2),
                         ['o\n', 'p\n'])


        os.remove('tmpfile.tmp')


        return

Пример #3

0

Показать файл

Файл: CMSSW.py Проект: rishiloyola/WMCore

    def __call__(self, errCode, executor, **args):
        logging.critical("%s Diagnostic Handler invoked",
                         self.__class__.__name__)
        msg = "Error in CMSSW: %s\n" % (errCode)
        jobRepXml = os.path.join(executor.step.builder.workingDir,
                                 executor.step.output.jobReport)

        excepInst = args.get('ExceptionInstance', None)

        description = "Misc. CMSSW error"
        if excepInst:
            if hasattr(excepInst, 'detail'):
                description = excepInst.detail
            msg += str(excepInst)

        if os.path.exists(jobRepXml):
            # job report XML exists, load the exception information from it
            try:
                executor.report.parse(jobRepXml)
            except FwkJobReportException:
                # Job report is bad, the parse already puts a 50115 in the file
                pass
            reportStep = executor.report.retrieveStep(
                executor.step._internal_name)
            reportStep.status = errCode

        # Grab stderr log from CMSSW
        errLog = os.path.join(os.path.dirname(jobRepXml),
                              '%s-stderr.log' % (executor.step._internal_name))
        outLog = os.path.join(os.path.dirname(jobRepXml),
                              '%s-stdout.log' % (executor.step._internal_name))

        if os.path.exists(errLog):
            logTail = BasicAlgos.tail(errLog, DEFAULT_TAIL_LINES_FROM_LOG)
            msg += '\n Adding last %s lines of CMSSW stderr:\n' % DEFAULT_TAIL_LINES_FROM_LOG
            msg += logTail
        if os.path.exists(outLog):
            logTail = BasicAlgos.tail(outLog, DEFAULT_TAIL_LINES_FROM_LOG)
            msg += '\n Adding last %s lines of CMSSW stdout:\n' % DEFAULT_TAIL_LINES_FROM_LOG
            msg += logTail

        # If it exists, grab the SCRAM log
        errLog = os.path.join(os.path.dirname(jobRepXml), 'scramOutput.log')

        if os.path.exists(errLog):
            logTail = BasicAlgos.tail(errLog, 25)
            msg += '\n Adding last ten lines of SCRAM error log:\n'
            msg += logTail

        # make sure the report has the error in it
        dummy = getattr(executor.report.report, "errors",
                        None)  # Seems to do nothing
        executor.report.addError(executor.step._internal_name, errCode,
                                 description, msg)

        return

Пример #4

0

Показать файл

Файл: CMSSW.py Проект: alexanderrichards/WMCore

    def __call__(self, errCode, executor, **args):
        logging.critical("%s Diagnostic Handler invoked", self.__class__.__name__)
        msg = "Error in CMSSW: %s\n" % (errCode)
        jobRepXml = os.path.join(executor.step.builder.workingDir,
                                 executor.step.output.jobReport)

        excepInst = args.get('ExceptionInstance', None)

        description = "Misc. CMSSW error"
        if excepInst:
            if hasattr(excepInst, 'detail'):
                description = excepInst.detail
            msg += str(excepInst)

        if os.path.exists(jobRepXml):
            # job report XML exists, load the exception information from it
            try:
                executor.report.parse(jobRepXml)
            except FwkJobReportException:
                # Job report is bad, the parse already puts a 50115 in the file
                pass
            reportStep = executor.report.retrieveStep(executor.step._internal_name)
            reportStep.status = errCode

        # Grab stderr log from CMSSW
        errLog = os.path.join(os.path.dirname(jobRepXml),
                              '%s-stderr.log' % (executor.step._internal_name))
        outLog = os.path.join(os.path.dirname(jobRepXml),
                              '%s-stdout.log' % (executor.step._internal_name))


        if os.path.exists(errLog):
            logTail = BasicAlgos.tail(errLog, DEFAULT_TAIL_LINES_FROM_LOG)
            msg += '\n Adding last %s lines of CMSSW stderr:\n' % DEFAULT_TAIL_LINES_FROM_LOG
            msg += logTail
        if os.path.exists(outLog):
            logTail = BasicAlgos.tail(outLog, DEFAULT_TAIL_LINES_FROM_LOG)
            msg += '\n Adding last %s lines of CMSSW stdout:\n' % DEFAULT_TAIL_LINES_FROM_LOG
            msg += logTail

        # If it exists, grab the SCRAM log
        errLog = os.path.join(os.path.dirname(jobRepXml),
                              'scramOutput.log')

        if os.path.exists(errLog):
            logTail = BasicAlgos.tail(errLog, 25)
            msg += '\n Adding last ten lines of SCRAM error log:\n'
            msg += logTail

        # make sure the report has the error in it
        dummy = getattr(executor.report.report, "errors", None)  # Seems to do nothing
        executor.report.addError(executor.step._internal_name,
                                 errCode, description, msg)

        return

Пример #5

0

Показать файл

Файл: CMSSW.py Проект: ticoann/WMCore

    def __call__(self, errCode, executor, **args):
        logging.critical("%s Diagnostic Handler invoked" %
                         self.__class__.__name__)
        msg = "Error in CMSSW: %s\n" % (errCode)
        jobRepXml = os.path.join(executor.step.builder.workingDir,
                                 executor.step.output.jobReport)

        excepInst = args.get('ExceptionInstance', None)

        description = "Misc. CMSSW error"
        if excepInst:
            if hasattr(excepInst, 'detail'):
                description = excepInst.detail
            msg += str(excepInst)

        if os.path.exists(jobRepXml):
            # job report XML exists, load the exception information from it
            executor.report.parse(jobRepXml)
            reportStep = executor.report.retrieveStep(
                executor.step._internal_name)
            reportStep.status = errCode

        # Grab stderr log from CMSSW
        errLog = os.path.join(os.path.dirname(jobRepXml),
                              '%s-stderr.log' % (executor.step._internal_name))
        outLog = os.path.join(os.path.dirname(jobRepXml),
                              '%s-stdout.log' % (executor.step._internal_name))

        if os.path.exists(errLog):
            logTail = BasicAlgos.tail(errLog, 10)
            msg += '\n Adding last ten lines of CMSSW stderr:\n'
            msg += "".join(logTail)
        if os.path.exists(outLog):
            logTail = BasicAlgos.tail(errLog, 10)
            msg += '\n Adding last ten lines of CMSSW stdout:\n'
            msg += "".join(logTail)

        # If it exists, grab the SCRAM log
        errLog = os.path.join(os.path.dirname(jobRepXml), 'scramOutput.log')

        if os.path.exists(errLog):
            logTail = BasicAlgos.tail(errLog, 25)
            msg += '\n Adding last ten lines of SCRAM error log:\n'
            msg += "".join(logTail)

        # make sure the report has the error in it
        errSection = getattr(executor.report.report, "errors", None)
        executor.report.addError(executor.step._internal_name, errCode,
                                 description, msg)

        return

Пример #6

0

Показать файл

Файл: CMSSW.py Проект: zhiwenuil/WMCore

    def __call__(self, errCode, executor, **args):
        print "%s Diagnostic Handler invoked" % self.__class__.__name__
        msg = "Error in CMSSW: %s\n" % (errCode)
        jobRepXml = os.path.join(executor.step.builder.workingDir,
                                 executor.step.output.jobReport)

        excepInst = args.get('ExceptionInstance', None)

        description = "Misc. CMSSW error"
        if excepInst:
            if hasattr(excepInst, 'detail'):
                description = excepInst.detail
            msg += str(excepInst)
        
        if os.path.exists(jobRepXml):
            # job report XML exists, load the exception information from it
            executor.report.parse(jobRepXml)
            reportStep = executor.report.retrieveStep(executor.step._internal_name)
            reportStep.status = errCode

        # Grab stderr log from CMSSW
        errLog = os.path.join(os.path.dirname(jobRepXml),
                              '%s-stderr.log' % (executor.step._internal_name))
        outLog = os.path.join(os.path.dirname(jobRepXml),
                              '%s-stdout.log' % (executor.step._internal_name))

        if os.path.exists(errLog):
            logTail = BasicAlgos.tail(errLog, 10)
            msg += '\n Adding last ten lines of CMSSW stderr:\n'
            msg += "".join(logTail)
        if os.path.exists(outLog):
            logTail = BasicAlgos.tail(errLog, 10)
            msg += '\n Adding last ten lines of CMSSW stdout:\n'
            msg += "".join(logTail)

        # If it exists, grab the SCRAM log
        errLog = os.path.join(os.path.dirname(jobRepXml),
                              'scramOutput.log')

        if os.path.exists(errLog):
            logTail = BasicAlgos.tail(errLog, 25)
            msg += '\n Adding last ten lines of SCRAM error log:\n'
            msg += "".join(logTail)
                
        # make sure the report has the error in it
        errSection = getattr(executor.report.report, "errors", None)
        executor.report.addError(executor.step._internal_name,
                                 errCode, description, msg)


        return

Пример #7

0

Показать файл

Файл: CMSSW.py Проект: prozober/WMCore

    def __call__(self, errCode, executor, **args):
        """
        Added for Steve to handle SCRAM script failure

        Must fail job (since SCRAM didn't run)

        """
        msg = "SCRAM scripts failed to run!\n"
        if args.get('ExceptionInstance', False):
            msg += str(args.get('ExceptionInstance'))

        jobReport = os.path.join(executor.step.builder.workingDir,
                                 executor.step.output.jobReport)
        errLog = os.path.join(os.path.dirname(jobReport),
                              'scramOutput.log')

        if os.path.exists(errLog):
            logTail = BasicAlgos.tail(errLog, 25)
            msg += '\n Adding last ten lines of SCRAM error log:\n'
            msg += "".join(logTail)

        executor.report.addError(executor.step._internal_name,
                                 50513, "SCRAMScriptFailure", msg)

        # Then mark the job as failed
        if executor.report.report.status == 0:
            executor.report.report.status = 1

Пример #8

0

Показать файл

Файл: SimpleCondorPlugin.py Проект: PerilousApricot/WMCore

    def complete(self, jobs):
        """
        Do any completion work required

        In this case, look for a returned logfile
        """

        for job in jobs:

            if job.get('cache_dir', None) is None or job.get('retry_count', None) is None:
                # Then we can't do anything
                logging.error("Can't find this job's cache_dir or retry count: %s", job)
                continue

            reportName = os.path.join(job['cache_dir'], 'Report.%i.pkl' % job['retry_count'])
            if os.path.isfile(reportName) and os.path.getsize(reportName) > 0:
                # everything in order, move on
                continue
            elif os.path.isdir(reportName):
                # Then something weird has happened. Report error, do nothing
                logging.error("The job report for job with id %s and gridid %s is a directory", job['id'],
                              job['gridid'])
                logging.error("Ignoring this, but this is very strange")
            else:
                logging.error("No job report for job with id %s and gridid %s", job['id'], job['gridid'])

                if os.path.isfile(reportName):
                    os.remove(reportName)

                # create a report from scratch
                condorReport = Report()
                logOutput = 'Could not find jobReport\n'

                if os.path.isdir(job['cache_dir']):
                    condorOut = "condor.%s.out" % job['gridid']
                    condorErr = "condor.%s.err" % job['gridid']
                    condorLog = "condor.%s.log" % job['gridid']
                    for condorFile in [condorOut, condorErr, condorLog]:
                        condorFilePath = os.path.join(job['cache_dir'], condorFile)
                        if os.path.isfile(condorFilePath):
                            logTail = BasicAlgos.tail(condorFilePath, 50)
                            logOutput += 'Adding end of %s to error message:\n' % condorFile
                            logOutput += '\n'.join(logTail)
                    condorReport.addError("NoJobReport", 99303, "NoJobReport", logOutput)
                else:
                    msg = "Serious Error in Completing condor job with id %s!\n" % job['id']
                    msg += "Could not find jobCache directory %s\n" % job['cache_dir']
                    msg += "Creating a new cache_dir for failed job report\n"
                    logging.error(msg)
                    os.makedirs(job['cache_dir'])
                    condorReport.addError("NoJobReport", 99304, "NoCacheDir", logOutput)

                condorReport.save(filename=reportName)

                logging.debug("Created failed job report for job with id %s and gridid %s", job['id'], job['gridid'])

        return

Пример #9

0

Показать файл

Файл: CMSSW.py Проект: tsarangi/WMCore

    def __call__(self, errCode, executor, **args):
        print "%s Diagnostic Handler invoked" % self.__class__.__name__
        msg = "Exit %s: %s Exception from cmsRun" % (self.code, self.desc)
        jobRepXml = os.path.join(executor.step.builder.workingDir,
                                 executor.step.output.jobReport)

        if os.path.exists(jobRepXml):
            # job report XML exists, load the exception information from it
            try:
                executor.report.parse(jobRepXml)
            except FwkJobReportException:
                # Job report is bad, the parse already puts a 50115 in the file
                pass
            reportStep = executor.report.retrieveStep(
                executor.step._internal_name)
            reportStep.status = self.code

        errLog = os.path.join(os.path.dirname(jobRepXml),
                              '%s-stderr.log' % (executor.step._internal_name))
        outLog = os.path.join(os.path.dirname(jobRepXml),
                              '%s-stdout.log' % (executor.step._internal_name))

        if os.path.exists(errLog):
            logTail = BasicAlgos.tail(errLog, 10)
            msg += '\n Adding last ten lines of CMSSW stderr:\n'
            msg += "".join(logTail)
        if os.path.exists(outLog):
            logTail = BasicAlgos.tail(errLog, 10)
            msg += '\n Adding last ten lines of CMSSW stdout:\n'
            msg += "".join(logTail)

        # make sure the report has the error in it
        errSection = getattr(executor.report.report, "errors", None)
        if errSection == None:
            executor.report.addError(executor.step._internal_name, self.code,
                                     self.desc, msg)
        else:
            if not hasattr(errSection, self.desc):
                executor.report.addError(executor.step._internal_name,
                                         self.code, self.desc, msg)

        print executor.report.report.errors
        return

Пример #10

0

Показать файл

Файл: CMSSW.py Проект: lucacopa/WMCore

    def __call__(self, errCode, executor, **args):
        print "%s Diagnostic Handler invoked" % self.__class__.__name__
        msg = "Exit %s: %s Exception from cmsRun" % (self.code, self.desc)
        jobRepXml = os.path.join(executor.step.builder.workingDir,
                                 executor.step.output.jobReport)

        if os.path.exists(jobRepXml):
            # job report XML exists, load the exception information from it
            try:
                executor.report.parse(jobRepXml)
            except FwkJobReportException:
                # Job report is bad, the parse already puts a 50115 in the file
                pass
            reportStep = executor.report.retrieveStep(executor.step._internal_name)
            reportStep.status = self.code


        errLog = os.path.join(os.path.dirname(jobRepXml),
                              '%s-stderr.log' % (executor.step._internal_name))
        outLog = os.path.join(os.path.dirname(jobRepXml),
                              '%s-stdout.log' % (executor.step._internal_name))

        if os.path.exists(errLog):
            logTail = BasicAlgos.tail(errLog, 10)
            msg += '\n Adding last ten lines of CMSSW stderr:\n'
            msg += "".join(logTail)
        if os.path.exists(outLog):
            logTail = BasicAlgos.tail(errLog, 10)
            msg += '\n Adding last ten lines of CMSSW stdout:\n'
            msg += "".join(logTail)

        # make sure the report has the error in it
        errSection = getattr(executor.report.report, "errors", None)
        if errSection == None:
            executor.report.addError(executor.step._internal_name,
                                     self.code, self.desc, msg)
        else:
            if not hasattr(errSection, self.desc):
                executor.report.addError(executor.step._internal_name,
                                         self.code, self.desc, msg)

        print executor.report.report.errors
        return

Пример #11

0

Показать файл

Файл: PyCondorPlugin.py Проект: BrunoCoimbra/WMCore

def parseCondorLogs(logfile, extension):
    """
    Retrieve the last X lines of the log file
    """
    errLog = None
    logOut = ''

    logPaths = glob.glob(logfile)
    if len(logPaths):
        errLog = max(logPaths, key=lambda path: os.stat(path).st_mtime)
    if errLog is not None and os.path.isfile(errLog):
        logTail = BasicAlgos.tail(errLog, 50)
        logOut += 'Adding end of condor.%s to error message:\n' % extension
        logOut += logTail
        logOut += '\n\n'
    return logOut

Пример #12

0

Показать файл

Файл: PyCondorPlugin.py Проект: johnhcasallasl/WMCore

def parseCondorLogs(logfile, extension):
    """
    Retrieve the last X lines of the log file
    """
    errLog = None
    logOut = ''

    logPaths = glob.glob(logfile)
    if len(logPaths):
        errLog = max(logPaths, key=lambda path: os.stat(path).st_mtime)
    if errLog is not None and os.path.isfile(errLog):
        logTail = BasicAlgos.tail(errLog, 50)
        logOut += 'Adding end of condor.%s to error message:\n' % extension
        logOut += logTail
        logOut += '\n\n'
    return logOut

Пример #13

0

Показать файл

    def complete(self, jobs):
        """
        Do any completion work required

        In this case, look for a returned logfile
        """

        for job in jobs:
            if job.get("cache_dir", None) == None or job.get("retry_count", None) == None:
                # Then we can't do anything
                logging.error("Can't find this job's cache_dir in CondorPlugin.complete")
                logging.error("cache_dir: %s" % job.get("cache_dir", "Missing"))
                logging.error("retry_count: %s" % job.get("retry_count", "Missing"))
                continue
            reportName = os.path.join(job["cache_dir"], "Report.%i.pkl" % job["retry_count"])
            if os.path.isfile(reportName) and os.path.getsize(reportName) > 0:
                # Then we have a real report.
                # Do nothing
                continue
            if os.path.isdir(reportName):
                # Then something weird has happened.
                # File error, do nothing
                logging.error("Went to check on error report for job %i.  Found a directory instead.\n" % job["id"])
                logging.error("Ignoring this, but this is very strange.\n")

            # If we're still here, we must not have a real error report
            logOutput = "Could not find jobReport"
            logPath = os.path.join(job["cache_dir"], "condor.log")
            if os.path.isfile(logPath):
                logTail = BasicAlgos.tail(errLog, 50)
                logOutput += "Adding end of condor.log to error message:\n"
                logOutput += logTail
            condorReport = Report()
            condorReport.addError("NoJobReport", 61303, "NoJobReport", logOutput)
            condorReport.save(filename=reportName)
            logging.debug("No returning job report for job %i" % job["id"])

        return

Пример #14

0

Показать файл

    def complete(self, jobs):
        """
        Do any completion work required

        In this case, look for a returned logfile
        """

        for job in jobs:

            if job.get('cache_dir', None) is None or job.get(
                    'retry_count', None) is None:
                # Then we can't do anything
                logging.error(
                    "Can't find this job's cache_dir or retry count: %s", job)
                continue

            reportName = os.path.join(job['cache_dir'],
                                      'Report.%i.pkl' % job['retry_count'])
            if os.path.isfile(reportName) and os.path.getsize(reportName) > 0:
                # everything in order, move on
                continue
            elif os.path.isdir(reportName):
                # Then something weird has happened. Report error, do nothing
                logging.error(
                    "The job report for job with id %s and gridid %s is a directory",
                    job['id'], job['gridid'])
                logging.error("Ignoring this, but this is very strange")
            else:
                logging.error("No job report for job with id %s and gridid %s",
                              job['id'], job['gridid'])

                if os.path.isfile(reportName):
                    os.remove(reportName)

                # create a report from scratch
                condorReport = Report()
                logOutput = 'Could not find jobReport\n'

                if os.path.isdir(job['cache_dir']):
                    condorErr = "condor.%s.err" % job['gridid']
                    condorOut = "condor.%s.out" % job['gridid']
                    condorLog = "condor.%s.log" % job['gridid']
                    exitCode = 99303
                    exitType = "NoJobReport"
                    for condorFile in [condorErr, condorOut, condorLog]:
                        condorFilePath = os.path.join(job['cache_dir'],
                                                      condorFile)
                        logOutput += "\n========== %s ==========\n" % condorFile
                        if os.path.isfile(condorFilePath):
                            logTail = BasicAlgos.tail(condorFilePath, 50)
                            logOutput += 'Adding end of %s to error message:\n\n' % condorFile
                            logOutput += logTail
                            logOutput += '\n\n'

                            if condorFile == condorLog:
                                # for condor log, search for the information
                                for matchObj in getIterMatchObjectOnRegexp(
                                        condorFilePath,
                                        CONDOR_LOG_FILTER_REGEXP):
                                    condorReason = matchObj.group("Reason")
                                    if condorReason:
                                        logOutput += condorReason
                                        if "SYSTEM_PERIODIC_REMOVE" in condorReason or "via condor_rm" in condorReason:
                                            exitCode = 99400
                                            exitType = "RemovedByGLIDEIN"
                                        else:
                                            exitCode = 99401

                                    siteName = matchObj.group("Site")
                                    if siteName:
                                        condorReport.data.siteName = siteName
                                    else:
                                        condorReport.data.siteName = "NoReportedSite"
                            else:
                                for matchObj in getIterMatchObjectOnRegexp(
                                        condorFilePath, WMEXCEPTION_REGEXP):
                                    errMsg = matchObj.group('WMException')
                                    if errMsg:
                                        logOutput += "\n\n%s\n" % errMsg

                                    errMsg = matchObj.group('ERROR')
                                    if errMsg:
                                        logOutput += "\n\n%s\n" % errMsg

                    logOutput += '\n\n'
                    condorReport.addError(exitType, exitCode, exitType,
                                          logOutput)
                else:
                    msg = "Serious Error in Completing condor job with id %s!\n" % job[
                        'id']
                    msg += "Could not find jobCache directory %s\n" % job[
                        'cache_dir']
                    msg += "Creating a new cache_dir for failed job report\n"
                    logging.error(msg)
                    os.makedirs(job['cache_dir'])
                    condorReport.addError("NoJobReport", 99304, "NoCacheDir",
                                          logOutput)

                condorReport.save(filename=reportName)

                logging.debug(
                    "Created failed job report for job with id %s and gridid %s",
                    job['id'], job['gridid'])

        return

Пример #15

0

Показать файл

Файл: CondorPlugin.py Проект: pietverwilligen/WMCore

    def complete(self, jobs):
        """
        Do any completion work required

        In this case, look for a returned logfile
        """

        for job in jobs:
            if job.get('cache_dir', None) == None or job.get('retry_count', None) == None:
                # Then we can't do anything
                logging.error("Can't find this job's cache_dir in CondorPlugin.complete")
                logging.error("cache_dir: %s" % job.get('cache_dir', 'Missing'))
                logging.error("retry_count: %s" % job.get('retry_count', 'Missing'))
                continue
            reportName = os.path.join(job['cache_dir'], 'Report.%i.pkl' % job['retry_count'])
            if os.path.isfile(reportName) and os.path.getsize(reportName) > 0:
                # Then we have a real report.
                # Do nothing
                continue
            if os.path.isdir(reportName):
                # Then something weird has happened.
                # File error, do nothing
                logging.error("Went to check on error report for job %i.  Found a directory instead.\n" % job['id'])
                logging.error("Ignoring this, but this is very strange.\n")

            # If we're still here, we must not have a real error report
            logOutput = 'Could not find jobReport\n'
            #But we don't know exactly the condor id, so it will append
            #the last lines of the latest condor log in cache_dir
            genLogPath = os.path.join(job['cache_dir'], 'condor.*.*.log')
            logPaths = glob.glob(genLogPath)
            errLog = None
            if len(logPaths):
                errLog = max(logPaths, key = lambda path :
                                                    os.stat(path).st_mtime)
            if errLog != None and os.path.isfile(errLog):
                logTail = BasicAlgos.tail(errLog, 50)
                logOutput += 'Adding end of condor.log to error message:\n'
                logOutput += '\n'.join(logTail)
            if not os.path.isdir(job['cache_dir']):
                msg =  "Serious Error in Completing condor job with id %s!\n" % job.get('id', 'unknown')
                msg += "Could not find jobCache directory - directory deleted under job: %s\n" % job['cache_dir']
                msg += "Creating artificial cache_dir for failed job report\n"
                logging.error(msg)
                os.makedirs(job['cache_dir'])
                logOutput += msg
                condorReport = Report()
                condorReport.addError("NoJobReport", 99304, "NoCacheDir", logOutput)
                condorReport.save(filename = reportName)
                continue
            condorReport = Report()
            condorReport.addError("NoJobReport", 99303, "NoJobReport", logOutput)
            if os.path.isfile(reportName):
                # Then we have a file already there.  It should be zero size due
                # to the if statements above, but we should remove it.
                if os.path.getsize(reportName) > 0:
                    # This should never happen.  If it does, ignore it
                    msg =  "Critical strange problem.  FWJR changed size while being processed."
                    logging.error(msg)
                else:
                    try:
                        os.remove(reportName)
                        condorReport.save(filename = reportName)
                    except Exception as ex:
                        logging.error("Cannot remove and replace empty report %s" % reportName)
                        logging.error("Report continuing without error!")
            else:
                condorReport.save(filename = reportName)

            # Debug message to end loop
            logging.debug("No returning job report for job %i" % job['id'])


        return

Пример #16

0

Показать файл

Файл: SimpleCondorPlugin.py Проект: amaltaro/WMCore

    def complete(self, jobs):
        """
        Do any completion work required

        In this case, look for a returned logfile
        """

        for job in jobs:

            if job.get('cache_dir', None) is None or job.get('retry_count', None) is None:
                # Then we can't do anything
                logging.error("Can't find this job's cache_dir or retry count: %s", job)
                continue

            reportName = os.path.join(job['cache_dir'], 'Report.%i.pkl' % job['retry_count'])
            if os.path.isfile(reportName) and os.path.getsize(reportName) > 0:
                # everything in order, move on
                continue
            elif os.path.isdir(reportName):
                # Then something weird has happened. Report error, do nothing
                logging.error("The job report for job with id %s and gridid %s is a directory", job['id'],
                              job['gridid'])
                logging.error("Ignoring this, but this is very strange")
            else:
                logging.error("No job report for job with id %s and gridid %s", job['id'], job['gridid'])

                if os.path.isfile(reportName):
                    os.remove(reportName)

                # create a report from scratch
                condorReport = Report()
                logOutput = 'Could not find jobReport\n'

                if os.path.isdir(job['cache_dir']):
                    condorErr = "condor.%s.err" % job['gridid']
                    condorOut = "condor.%s.out" % job['gridid']
                    condorLog = "condor.%s.log" % job['gridid']
                    exitCode = 99303
                    exitType = "NoJobReport"
                    for condorFile in [condorErr, condorOut, condorLog]:
                        condorFilePath = os.path.join(job['cache_dir'], condorFile)
                        logOutput += "\n========== %s ==========\n" % condorFile
                        if os.path.isfile(condorFilePath):
                            logTail = BasicAlgos.tail(condorFilePath, 50)
                            logOutput += 'Adding end of %s to error message:\n\n' % condorFile
                            logOutput += logTail
                            logOutput += '\n\n'

                            if condorFile == condorLog:
                                # for condor log, search for the information
                                for matchObj in getIterMatchObjectOnRegexp(condorFilePath, CONDOR_LOG_FILTER_REGEXP):
                                    condorReason = matchObj.group("Reason")
                                    if condorReason:
                                        logOutput += condorReason
                                        if "SYSTEM_PERIODIC_REMOVE" in condorReason or "via condor_rm" in condorReason:
                                            exitCode = 99400
                                            exitType = "RemovedByGLIDEIN"
                                        else:
                                            exitCode = 99401

                                    siteName = matchObj.group("Site")
                                    if siteName:
                                        condorReport.data.siteName = siteName
                                    else:
                                        condorReport.data.siteName = "NoReportedSite"
                            else:
                                for matchObj in getIterMatchObjectOnRegexp(condorFilePath, WMEXCEPTION_REGEXP):
                                    errMsg = matchObj.group('WMException')
                                    if errMsg:
                                        logOutput += "\n\n%s\n" % errMsg

                                    errMsg = matchObj.group('ERROR')
                                    if errMsg:
                                        logOutput += "\n\n%s\n" % errMsg

                    logOutput += '\n\n'
                    condorReport.addError(exitType, exitCode, exitType, logOutput)
                else:
                    msg = "Serious Error in Completing condor job with id %s!\n" % job['id']
                    msg += "Could not find jobCache directory %s\n" % job['cache_dir']
                    msg += "Creating a new cache_dir for failed job report\n"
                    logging.error(msg)
                    os.makedirs(job['cache_dir'])
                    condorReport.addError("NoJobReport", 99304, "NoCacheDir", logOutput)

                condorReport.save(filename=reportName)

                logging.debug("Created failed job report for job with id %s and gridid %s", job['id'], job['gridid'])

        return

Пример #17

0

Показать файл

Файл: CondorPlugin.py Проект: prozober/WMCore

    def complete(self, jobs):
        """
        Do any completion work required

        In this case, look for a returned logfile
        """

        for job in jobs:
            if job.get('cache_dir', None) == None or job.get('retry_count', None) == None:
                # Then we can't do anything
                logging.error("Can't find this job's cache_dir in CondorPlugin.complete")
                logging.error("cache_dir: %s" % job.get('cache_dir', 'Missing'))
                logging.error("retry_count: %s" % job.get('retry_count', 'Missing'))
                continue
            reportName = os.path.join(job['cache_dir'], 'Report.%i.pkl' % job['retry_count'])
            if os.path.isfile(reportName) and os.path.getsize(reportName) > 0:
                # Then we have a real report.
                # Do nothing
                continue
            if os.path.isdir(reportName):
                # Then something weird has happened.
                # File error, do nothing
                logging.error("Went to check on error report for job %i.  Found a directory instead.\n" % job['id'])
                logging.error("Ignoring this, but this is very strange.\n")

            # If we're still here, we must not have a real error report
            logOutput = 'Could not find jobReport\n'
            #But we don't know exactly the condor id, so it will append
            #the last lines of the latest condor log in cache_dir
            genLogPath = os.path.join(job['cache_dir'], 'condor.*.*.log')
            logPaths = glob.glob(genLogPath)
            errLog = None
            if len(logPaths):
                errLog = max(logPaths, key = lambda path :
                                                    os.stat(path).st_mtime)
            if errLog != None and os.path.isfile(errLog):
                logTail = BasicAlgos.tail(errLog, 50)
                logOutput += 'Adding end of condor.log to error message:\n'
                logOutput += '\n'.join(logTail)
            if not os.path.isdir(job['cache_dir']):
                msg =  "Serious Error in Completing condor job with id %s!\n" % job.get('id', 'unknown')
                msg += "Could not find jobCache directory - directory deleted under job: %s\n" % job['cache_dir']
                msg += "Creating artificial cache_dir for failed job report\n"
                logging.error(msg)
                os.makedirs(job['cache_dir'])
                logOutput += msg
                condorReport = Report()
                condorReport.addError("NoJobReport", 99304, "NoCacheDir", logOutput)
                condorReport.save(filename = reportName)
                continue
            condorReport = Report()
            condorReport.addError("NoJobReport", 99303, "NoJobReport", logOutput)
            if os.path.isfile(reportName):
                # Then we have a file already there.  It should be zero size due
                # to the if statements above, but we should remove it.
                if os.path.getsize(reportName) > 0:
                    # This should never happen.  If it does, ignore it
                    msg =  "Critical strange problem.  FWJR changed size while being processed."
                    logging.error(msg)
                else:
                    try:
                        os.remove(reportName)
                        condorReport.save(filename = reportName)
                    except Exception as ex:
                        logging.error("Cannot remove and replace empty report %s" % reportName)
                        logging.error("Report continuing without error!")
            else:
                condorReport.save(filename = reportName)

            # Debug message to end loop
            logging.debug("No returning job report for job %i" % job['id'])


        return

Пример #18

0

Показать файл

Файл: CMSSW.py Проект: prozober/WMCore

    def __call__(self, errCode, executor, **args):
        """
        _operator()_

        Look for the XML job report, try and read it and extract the error information from it

        """
        jobRepXml = os.path.join(executor.step.builder.workingDir,
                                 executor.step.output.jobReport)

        errLog = os.path.join(os.path.dirname(jobRepXml),
                              '%s-stderr.log' % (executor.step._internal_name))
        outLog = os.path.join(os.path.dirname(jobRepXml),
                              '%s-stdout.log' % (executor.step._internal_name))

        addOn = '\n'
        if os.path.exists(errLog):
            logTail = BasicAlgos.tail(errLog, 10)
            addOn += '\nAdding last ten lines of CMSSW stderr:\n'
            addOn += "".join(logTail)
        else:
            logging.error("No stderr from CMSSW")
            logging.error(os.listdir(os.path.basename(jobRepXml)))

        if os.path.exists(outLog):
            logTail = BasicAlgos.tail(errLog, 10)
            msg = '\n Adding last ten lines of CMSSW stdout:\n'
            msg += "".join(logTail)

        # Add the error we were sent
        ex = args.get('ExceptionInstance', None)
        executor.report.addError(executor.step._internal_name,
                                 errCode, "CMSSWStepFailure", msg + str(ex))

        if not os.path.exists(jobRepXml):
            # no report => Error
            msg = "No Job Report Found: %s" % jobRepXml
            executor.report.addError(executor.step._internal_name,
                                     50115, "MissingJobReport", msg)
            return

        # job report XML exists, load the exception information from it
        try:
            executor.report.parse(jobRepXml)
        except FwkJobReportException:
            # Job report is bad, the parse already puts a 50115 in the file
            # just go on
            pass

        # make sure the report has the error in it
        errSection = getattr(executor.report.report, "errors", None)
        if errSection == None:
            msg = "Job Report contains no error report, but cmsRun exited non-zero: %s" % errCode
            msg += addOn
            executor.report.addError(executor.step._internal_name,
                                     50116, "MissingErrorReport", msg)
            return

        else:
            # check exit code in report is non zero
            if executor.report.report.status == 0:
                msg = "Job Report contains no error report, but cmsRun exited non-zero: %s" % errCode
                msg += addOn
                executor.report.addError(executor.step._internal_name,
                                         50116, "MissingErrorReport", msg)

            else:
                msg = "Adding extra error in order to hold error report"
                msg += addOn
                executor.report.addError(executor.step._internal_name,
                                         99999, "ErrorLoggingAddition", msg)
        return

Пример #19

0

Показать файл

Файл: SimpleCondorPlugin.py Проект: johnhcasallasl/WMCore

    def complete(self, jobs):
        """
        Do any completion work required

        In this case, look for a returned logfile
        """

        for job in jobs:

            if job.get('cache_dir', None) is None or job.get(
                    'retry_count', None) is None:
                # Then we can't do anything
                logging.error(
                    "Can't find this job's cache_dir or retry count: %s", job)
                continue

            reportName = os.path.join(job['cache_dir'],
                                      'Report.%i.pkl' % job['retry_count'])
            if os.path.isfile(reportName) and os.path.getsize(reportName) > 0:
                # everything in order, move on
                continue
            elif os.path.isdir(reportName):
                # Then something weird has happened. Report error, do nothing
                logging.error(
                    "The job report for job with id %s and gridid %s is a directory",
                    job['id'], job['gridid'])
                logging.error("Ignoring this, but this is very strange")
            else:
                logging.error("No job report for job with id %s and gridid %s",
                              job['id'], job['gridid'])

                if os.path.isfile(reportName):
                    os.remove(reportName)

                # create a report from scratch
                condorReport = Report()
                logOutput = 'Could not find jobReport\n'

                if os.path.isdir(job['cache_dir']):
                    condorOut = "condor.%s.out" % job['gridid']
                    condorErr = "condor.%s.err" % job['gridid']
                    condorLog = "condor.%s.log" % job['gridid']
                    for condorFile in [condorOut, condorErr, condorLog]:
                        condorFilePath = os.path.join(job['cache_dir'],
                                                      condorFile)
                        if os.path.isfile(condorFilePath):
                            logTail = BasicAlgos.tail(condorFilePath, 50)
                            logOutput += 'Adding end of %s to error message:\n' % condorFile
                            logOutput += '\n'.join(logTail)
                    condorReport.addError("NoJobReport", 99303, "NoJobReport",
                                          logOutput)
                else:
                    msg = "Serious Error in Completing condor job with id %s!\n" % job[
                        'id']
                    msg += "Could not find jobCache directory %s\n" % job[
                        'cache_dir']
                    msg += "Creating a new cache_dir for failed job report\n"
                    logging.error(msg)
                    os.makedirs(job['cache_dir'])
                    condorReport.addError("NoJobReport", 99304, "NoCacheDir",
                                          logOutput)

                condorReport.save(filename=reportName)

                logging.debug(
                    "Created failed job report for job with id %s and gridid %s",
                    job['id'], job['gridid'])

        return

Python BasicAlgos.tail примеры использования