예제 #1
0
파일: jobTree.py 프로젝트: sbenz/jobTree
def createFirstJob(command, config, memory=None, cpu=None, time=sys.maxint):
    """Adds the first job to to the jobtree.
    """
    logger.info("Adding the first job")
    if memory == None:
        memory = config.attrib["default_memory"]
    if cpu == None:
        cpu = config.attrib["default_cpu"]
    job = createJob({ "command":command, "memory":str(int(memory)), "cpu":str(int(cpu)), "time":str(float(time)) }, None, config)
    writeJobs([job])
    logger.info("Added the first job")
예제 #2
0
def main():
    sys.path +=  [ sys.argv[1] ]
    sys.argv.remove(sys.argv[1])
    
    #Now we can import all the stuff..
    from workflow.jobTree.lib.bioio import getBasicOptionParser
    from workflow.jobTree.lib.bioio import parseBasicOptions
    from workflow.jobTree.lib.bioio import logger
    from workflow.jobTree.lib.bioio import addLoggingFileHandler
    from workflow.jobTree.lib.bioio import setLogLevel
    from workflow.jobTree.lib.bioio import getTotalCpuTime
    
    from workflow.jobTree.lib.master import writeJobs
    
    ##########################################
    #Construct the arguments.
    ##########################################
    
    parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1")
    
    parser.add_option("--job", dest="jobFile", 
                      help="Job file containing command to run",
                      default="None")
    
    options, args = parseBasicOptions(parser)
    assert len(args) == 0

    ##########################################
    #Parse the job.
    ##########################################
    
    job = ET.parse(options.jobFile).getroot()
    
    ##########################################
    #Setup the logging
    ##########################################
    
    #Setup the logging
    setLogLevel(job.attrib["log_level"])
    addLoggingFileHandler(job.attrib["slave_log_file"], rotatingLogging=False)
    logger.info("Parsed arguments and set up logging")
    
    ##########################################
    #Setup the stats, if requested
    ##########################################
    
    if job.attrib.has_key("stats"):
        startTime = time.time()
        startClock = time.clock()
        stats = ET.Element("slave")
    else:
        stats = None
    
    ##########################################
    #Run the script.
    ##########################################
    
    maxTime = float(job.attrib["job_time"])
    assert maxTime > 0.0
    assert maxTime < sys.maxint
    jobToRun = job.find("followOns").findall("followOn")[-1]
    memoryAvailable = int(jobToRun.attrib["memory"])
    cpuAvailable = int(jobToRun.attrib["cpu"])
    while True:
        processJob(job, jobToRun, memoryAvailable, cpuAvailable, stats)
        
        if job.attrib["colour"] != "black":
            logger.info("Exiting the slave because of a failed job")
            break
   
        totalRuntime = float(job.attrib["total_time"])  #This is the estimate runtime of the jobs on the followon stack
        
        childrenNode = job.find("children")
        childrenList = childrenNode.findall("child")
        #childRuntime = sum([ float(child.attrib["time"]) for child in childrenList ])
            
        if len(childrenList) >= 2: # or totalRuntime + childRuntime > maxTime: #We are going to have to return to the parent
            logger.info("No more jobs can run in series by this slave, its got %i children" % len(childrenList))
            break
        
        followOns = job.find("followOns")
        while len(childrenList) > 0:
            child = childrenList.pop()
            childrenNode.remove(child)
            totalRuntime += float(child.attrib["time"])
            ET.SubElement(followOns, "followOn", child.attrib.copy())
        #assert totalRuntime <= maxTime + 1 #The plus one second to avoid unimportant rounding errors
        job.attrib["total_time"] = str(totalRuntime)
        assert len(childrenNode.findall("child")) == 0
        
        if len(followOns.findall("followOn")) == 0:
            logger.info("No more jobs can run by this slave as we have exhausted the follow ons")
            break
        
        #Get the next job and see if we have enough cpu and memory to run it..
        jobToRun = job.find("followOns").findall("followOn")[-1]
        if int(jobToRun.attrib["memory"]) > memoryAvailable:
            logger.info("We need more memory for the next job, so finishing")
            break
        if int(jobToRun.attrib["cpu"]) > cpuAvailable:
            logger.info("We need more cpus for the next job, so finishing")
            break
        
        ##Updated the job so we can start the next loop cycle
        job.attrib["colour"] = "grey"
        writeJobs([ job ])
        logger.info("Updated the status of the job to grey and starting the next job")
    
    #Write back the job file with the updated jobs, using the checkpoint method.
    writeJobs([ job ])
    logger.info("Written out an updated job file")
    
    logger.info("Finished running the chain of jobs on this node")
    
    ##########################################
    #Finish up the stats
    ##########################################
    
    if stats != None:
        stats.attrib["time"] = str(time.time() - startTime)
        stats.attrib["clock"] = str(getTotalCpuTime() - startClock)
        fileHandle = open(job.attrib["stats"], 'w')
        ET.ElementTree(stats).write(fileHandle)
        fileHandle.close()