Exemplo n.º 1
0
def processFinishedJob(jobID, resultStatus, updatedJobFiles, jobBatcher, childJobFileToParentJob, childCounts, config):
    """Function reads a processed job file and updates it state.
    """
    jobFile = jobBatcher.removeJobID(jobID)
    updatingFilePresent = processAnyUpdatingFile(jobFile)
    newFilePresent = processAnyNewFile(jobFile)
    jobDir = os.path.split(jobFile)[0]
    if os.path.exists(getJobLogFileName(jobDir)):
        logger.critical("The job seems to have left a log file, indicating failure: %s", jobFile)
        logFile(getJobLogFileName(jobDir), logger.critical)
    if os.path.isfile(jobFile):        
        job = Job.read(jobFile)
        assert job not in updatedJobFiles
        if resultStatus != 0 or newFilePresent or updatingFilePresent:
            if not os.path.exists(job.getLogFileName()):
                logger.critical("No log file is present, despite job failing: %s", jobFile)
            setupJobAfterFailure(job, config)
        if len(job.followOnCommands) > 0 or len(job.children) > 0:
            updatedJobFiles.add(job) #Now we know the job is done we can add it to the list of updated job files
            logger.debug("Added job: %s to active jobs" % jobFile)
        else:
            for message in job.messages: #This is here because jobs with no children or follow ons may log to master.
                logger.critical("Got message from job at time: %s : %s" % (time.time(), message))
            logger.debug("Job has no follow-ons or children despite job file being present so we'll consider it done: %s" % jobFile)
            updateParentStatus(jobFile, updatedJobFiles, childJobFileToParentJob, childCounts)
    else:  #The job is done
        if resultStatus != 0:
            logger.critical("Despite the batch system claiming failure the job %s seems to have finished and been removed" % jobFile)
        updateParentStatus(jobFile, updatedJobFiles, childJobFileToParentJob, childCounts)
Exemplo n.º 2
0
def processFinishedJob(jobID, resultStatus, updatedJobFiles, jobBatcher,
                       childJobFileToParentJob, childCounts, config):
    """Function reads a processed job file and updates it state.
    """
    jobFile = jobBatcher.removeJobID(jobID)
    updatingFilePresent = processAnyUpdatingFile(jobFile)
    newFilePresent = processAnyNewFile(jobFile)
    jobDir = os.path.split(jobFile)[0]
    if os.path.exists(getJobLogFileName(jobDir)):
        logger.critical(
            "The job seems to have left a log file, indicating failure: %s",
            jobFile)
        logFile(getJobLogFileName(jobDir), logger.critical)
    if os.path.isfile(jobFile):
        job = Job.read(jobFile)
        assert job not in updatedJobFiles
        if resultStatus != 0 or newFilePresent or updatingFilePresent:
            if not os.path.exists(job.getLogFileName()):
                logger.critical(
                    "No log file is present, despite job failing: %s", jobFile)
            setupJobAfterFailure(job, config)
        if len(job.followOnCommands) > 0 or len(job.children) > 0:
            updatedJobFiles.add(
                job
            )  #Now we know the job is done we can add it to the list of updated job files
            logger.debug("Added job: %s to active jobs" % jobFile)
        else:
            for message in job.messages:  #This is here because jobs with no children or follow ons may log to master.
                logger.critical("Got message from job at time: %s : %s" %
                                (time.time(), message))
            logger.debug(
                "Job has no follow-ons or children despite job file being present so we'll consider it done: %s"
                % jobFile)
            updateParentStatus(jobFile, updatedJobFiles,
                               childJobFileToParentJob, childCounts)
    else:  #The job is done
        if resultStatus != 0:
            logger.critical(
                "Despite the batch system claiming failure the job %s seems to have finished and been removed"
                % jobFile)
        updateParentStatus(jobFile, updatedJobFiles, childJobFileToParentJob,
                           childCounts)
Exemplo n.º 3
0
def main():
    """Reports the state of the job tree.
    """

    ##########################################
    #Construct the arguments.
    ##########################################

    parser = getBasicOptionParser(
        "usage: %prog [--jobTree] JOB_TREE_DIR [options]", "%prog 0.1")

    parser.add_option(
        "--jobTree",
        dest="jobTree",
        help=
        "Directory containing the job tree. The jobTree location can also be specified as the argument to the script. default=%default",
        default='./jobTree')

    parser.add_option(
        "--verbose",
        dest="verbose",
        action="store_true",
        help=
        "Print loads of information, particularly all the log files of jobs that failed. default=%default",
        default=False)

    parser.add_option(
        "--failIfNotComplete",
        dest="failIfNotComplete",
        action="store_true",
        help=
        "Return exit value of 1 if job tree jobs not all completed. default=%default",
        default=False)

    options, args = parseBasicOptions(parser)
    logger.info("Parsed arguments")

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    assert len(args) <= 1  #Only jobtree may be specified as argument
    if len(args) == 1:  #Allow jobTree directory as arg
        options.jobTree = args[0]

    ##########################################
    #Do some checks.
    ##########################################

    logger.info("Checking if we have files for job tree")
    assert options.jobTree != None
    assert os.path.isdir(options.jobTree)  #The given job dir tree must exist.
    assert os.path.isfile(getConfigFileName(
        options.jobTree))  #A valid job tree must contain the config gile
    assert os.path.isdir(getJobFileDirName(
        options.jobTree))  #A job tree must have a directory of jobs.

    ##########################################
    #Survey the status of the job and report.
    ##########################################

    childJobFileToParentJob, childCounts, updatedJobFiles, shellJobs = {}, {}, set(
    ), set()
    parseJobFiles(getJobFileDirName(options.jobTree), updatedJobFiles,
                  childJobFileToParentJob, childCounts, shellJobs)

    failedJobs = [
        job for job in updatedJobFiles | set(childCounts.keys())
        if job.remainingRetryCount == 0
    ]

    print "There are %i active jobs, %i parent jobs with children, %i totally failed jobs and %i empty jobs (i.e. finished but not cleaned up) currently in job tree: %s" % \
    (len(updatedJobFiles), len(childCounts), len(failedJobs), len(shellJobs), options.jobTree)

    if options.verbose:  #Verbose currently means outputting the files that have failed.
        for job in failedJobs:
            if os.path.isfile(job.getLogFileName()):
                print "Log file of failed job: %s" % job.getLogFileName()
                logFile(job.getLogFileName(), logger.critical)
            else:
                print "Log file for job %s is not present" % job.getJobFileName(
                )
        if len(failedJobs) == 0:
            print "There are no failed jobs to report"

    if (len(updatedJobFiles) +
            len(childCounts)) != 0 and options.failIfNotComplete:
        sys.exit(1)
Exemplo n.º 4
0
def reportJobLogFiles(job):
    logger.critical("The log file of the job")
    logFile(job.attrib["log_file"], logger.critical)
    logger.critical("The log file of the slave for the job")
    logFile(job.attrib["slave_log_file"], logger.critical) #We log the job log file in the main loop
Exemplo n.º 5
0
def main():
    """Reports the state of the job tree.
    """

    ##########################################
    # Construct the arguments.
    ##########################################

    parser = getBasicOptionParser(
        "usage: %prog [options] \nThe colours returned indicate the state of the job.\n\
\twhite: job has not been started yet\n\
\tgrey: job is issued to batch system\n\
\tred: job failed\n\
\tblue: job has children currently being processed\n\
\tblack: job has finished and will be processed (transient state)\n\
\tdead: job is totally finished and is awaiting deletion (transient state)",
        "%prog 0.1",
    )

    parser.add_option("--jobTree", dest="jobTree", help="Directory containing the job tree")

    parser.add_option(
        "--verbose",
        dest="verbose",
        action="store_true",
        help="Print loads of information, particularly all the log files of errors. default=%default",
        default=False,
    )

    parser.add_option(
        "--failIfNotComplete",
        dest="failIfNotComplete",
        action="store_true",
        help="Return exit value of 1 if job tree jobs not all completed. default=%default",
        default=False,
    )

    options, args = parseBasicOptions(parser)
    logger.info("Parsed arguments")
    assert len(args) == 0

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    ##########################################
    # Do some checks.
    ##########################################

    logger.info("Checking if we have files for job tree")
    assert options.jobTree != None
    assert os.path.isdir(options.jobTree)  # The given job dir tree must exist.
    assert os.path.isfile(os.path.join(options.jobTree, "config.xml"))  # A valid job tree must contain the config gile
    assert os.path.isdir(os.path.join(options.jobTree, "jobs"))  # A job tree must have a directory of jobs.
    assert os.path.isdir(
        os.path.join(options.jobTree, "tempDirDir")
    )  # A job tree must have a directory of temporary directories (for jobs to make temp files in).
    assert os.path.isdir(os.path.join(options.jobTree, "logFileDir"))  # A job tree must have a directory of log files.
    assert os.path.isdir(
        os.path.join(options.jobTree, "slaveLogFileDir")
    )  # A job tree must have a directory of slave log files.

    ##########################################
    # Read the total job number
    ##########################################

    config = ET.parse(os.path.join(options.jobTree, "config.xml")).getroot()

    ##########################################
    # Survey the status of the job and report.
    ##########################################

    colours = {}
    jobFiles = TempFileTree(config.attrib["job_file_dir"]).listFiles()
    if len(jobFiles) > 0:
        logger.info("Collating the colours of the job tree")
        for absFileName in jobFiles:
            job = parseJobFile(absFileName)
            if job != None:
                if not colours.has_key(job.attrib["colour"]):
                    colours[job.attrib["colour"]] = 0
                colours[job.attrib["colour"]] += 1
    else:
        logger.info("There are no jobs to collate")

    print "There are %i jobs currently in job tree: %s" % (len(jobFiles), options.jobTree)

    for colour in colours.keys():
        print "\tColour: %s, number of jobs: %s" % (colour, colours[colour])

    if options.verbose:  # Verbose currently means outputting the files that have failed.
        for absFileName in jobFiles:
            job = parseJobFile(absFileName)
            if job != None:
                if job.attrib["colour"] == "red":
                    if os.path.isfile(job.attrib["log_file"]):

                        def fn(string):
                            print string

                        logFile(job.attrib["log_file"], fn)
                    else:
                        logger.info("Log file for job %s is not present" % job.attrib["file"])

    if len(jobFiles) != 0 and options.failIfNotComplete:
        sys.exit(1)
Exemplo n.º 6
0
def main():
    """Reports the state of the job tree.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  
    
    parser = getBasicOptionParser("usage: %prog [--jobTree] JOB_TREE_DIR [options]", "%prog 0.1")
    
    parser.add_option("--jobTree", dest="jobTree", 
                      help="Directory containing the job tree")
    
    parser.add_option("--verbose", dest="verbose", action="store_true",
                      help="Print loads of information, particularly all the log files of errors. default=%default",
                      default=False)
    
    parser.add_option("--failIfNotComplete", dest="failIfNotComplete", action="store_true",
                      help="Return exit value of 1 if job tree jobs not all completed. default=%default",
                      default=False)
    
    options, args = parseBasicOptions(parser)
    logger.info("Parsed arguments")
    
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    assert len(args) <= 1 #Only jobtree may be specified as argument
    if len(args) == 1: #Allow jobTree directory as arg
        options.jobTree = args[0]
    
    ##########################################
    #Do some checks.
    ##########################################
    
    logger.info("Checking if we have files for job tree")
    assert options.jobTree != None
    assert os.path.isdir(options.jobTree) #The given job dir tree must exist.
    assert os.path.isfile(getConfigFileName(options.jobTree)) #A valid job tree must contain the config gile
    assert os.path.isdir(getJobFileDirName(options.jobTree)) #A job tree must have a directory of jobs.
    
    ##########################################
    #Survey the status of the job and report.
    ##########################################  
    
    childJobFileToParentJob, childCounts, updatedJobFiles, shellJobs = {}, {}, set(), set()
    parseJobFiles(getJobFileDirName(options.jobTree), updatedJobFiles, childJobFileToParentJob, childCounts, shellJobs)
    
    failedJobs = [ job for job in updatedJobFiles | set(childCounts.keys()) if job.remainingRetryCount == 0 ]
           
    print "There are %i active jobs, %i parent jobs with children, %i totally failed jobs and %i empty jobs (i.e. finished but not cleaned up) currently in job tree: %s" % \
    (len(updatedJobFiles), len(childCounts), len(failedJobs), len(shellJobs), options.jobTree)
    
    if options.verbose: #Verbose currently means outputting the files that have failed.
        for job in failedJobs:
            if os.path.isfile(job.getLogFileName()):
                print "Log file of failed job: %s" % job.getLogFileName()
                logFile(job.getLogFileName(), logger.critical)
            else:
                print "Log file for job %s is not present" % job.getJobFileName() 
        if len(failedJobs) == 0:
            print "There are no failed jobs to report"   
    
    if (len(updatedJobFiles) + len(childCounts)) != 0 and options.failIfNotComplete:
        sys.exit(1)