def createFirstJob(command, config, memory=None, cpu=None, time=sys.maxint): """Adds the first job to to the jobtree. """ logger.info("Adding the first job") if memory == None: memory = config.attrib["default_memory"] if cpu == None: cpu = config.attrib["default_cpu"] job = createJob({ "command":command, "memory":str(int(memory)), "cpu":str(int(cpu)), "time":str(float(time)) }, None, config) writeJobs([job]) logger.info("Added the first job")
def main(): sys.path.append(sys.argv[1]) sys.argv.remove(sys.argv[1]) #Now we can import all the stuff.. from sonLib.bioio import getBasicOptionParser from sonLib.bioio import parseBasicOptions from sonLib.bioio import logger from sonLib.bioio import addLoggingFileHandler from sonLib.bioio import setLogLevel from sonLib.bioio import getTotalCpuTime from sonLib.bioio import getTempDirectory from jobTree.src.master import writeJobs from jobTree.src.master import readJob from sonLib.bioio import system ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--job", dest="jobFile", help="Job file containing command to run", default="None") options, args = parseBasicOptions(parser) assert len(args) == 0 ########################################## #Parse the job. ########################################## job = readJob(options.jobFile) ########################################## #Setup the logging ########################################## #Setup the logging setLogLevel(job.attrib["log_level"]) addLoggingFileHandler(job.attrib["slave_log_file"], rotatingLogging=False) logger.info("Parsed arguments and set up logging") ########################################## #Setup the stats, if requested ########################################## if job.attrib.has_key("stats"): startTime = time.time() startClock = getTotalCpuTime() stats = ET.Element("slave") else: stats = None ########################################## #Load the environment for the job ########################################## #First load the environment for the job. fileHandle = open(job.attrib["environment_file"], 'r') environment = cPickle.load(fileHandle) fileHandle.close() for i in environment: if i not in ("TMPDIR", "TMP", "HOSTNAME", "HOSTTYPE"): os.environ[i] = environment[i] # sys.path is used by __import__ to find modules if "PYTHONPATH" in environment: for e in environment["PYTHONPATH"].split(':'): if e != '': sys.path.append(e) #os.environ = environment #os.putenv(key, value) logger.info("Loaded the environment for the process") ########################################## #Setup the temporary directories. ########################################## #Dir to put all the temp files in. localSlaveTempDir = getTempDirectory() localTempDir = os.path.join(localSlaveTempDir, "localTempDir") os.mkdir(localTempDir) os.chmod(localTempDir, 0777) ########################################## #Run the script. ########################################## maxTime = float(job.attrib["job_time"]) assert maxTime > 0.0 assert maxTime < sys.maxint jobToRun = job.find("followOns").findall("followOn")[-1] memoryAvailable = int(jobToRun.attrib["memory"]) cpuAvailable = int(jobToRun.attrib["cpu"]) startTime = time.time() while True: tempLogFile = processJob(job, jobToRun, memoryAvailable, cpuAvailable, stats, environment, localSlaveTempDir, localTempDir) if job.attrib["colour"] != "black": logger.critical("Exiting the slave because of a failed job") system("mv %s %s" % (tempLogFile, job.attrib["log_file"])) #Copy back the job log file, because we saw failure break elif job.attrib.has_key("reportAllJobLogFiles"): logger.info("Exiting because we've been asked to report all logs, and this involves returning to the master") #Copy across the log file system("mv %s %s" % (tempLogFile, job.attrib["log_file"])) break totalRuntime = float(job.attrib["total_time"]) #This is the estimate runtime of the jobs on the followon stack childrenNode = job.find("children") childrenList = childrenNode.findall("child") #childRuntime = sum([ float(child.attrib["time"]) for child in childrenList ]) if len(childrenList) >= 2: # or totalRuntime + childRuntime > maxTime: #We are going to have to return to the parent logger.info("No more jobs can run in series by this slave, its got %i children" % len(childrenList)) break if time.time() - startTime > maxTime: logger.info("We are breaking because the maximum time the job should run for has been exceeded") break followOns = job.find("followOns") while len(childrenList) > 0: child = childrenList.pop() childrenNode.remove(child) totalRuntime += float(child.attrib["time"]) ET.SubElement(followOns, "followOn", child.attrib.copy()) #assert totalRuntime <= maxTime + 1 #The plus one second to avoid unimportant rounding errors job.attrib["total_time"] = str(totalRuntime) assert len(childrenNode.findall("child")) == 0 if len(followOns.findall("followOn")) == 0: logger.info("No more jobs can run by this slave as we have exhausted the follow ons") break #Get the next job and see if we have enough cpu and memory to run it.. jobToRun = job.find("followOns").findall("followOn")[-1] if int(jobToRun.attrib["memory"]) > memoryAvailable: logger.info("We need more memory for the next job, so finishing") break if int(jobToRun.attrib["cpu"]) > cpuAvailable: logger.info("We need more cpus for the next job, so finishing") break ##Updated the job so we can start the next loop cycle job.attrib["colour"] = "grey" writeJobs([ job ]) logger.info("Updated the status of the job to grey and starting the next job") #Write back the job file with the updated jobs, using the checkpoint method. writeJobs([ job ]) logger.info("Written out an updated job file") logger.info("Finished running the chain of jobs on this node, we ran for a total of %f seconds" % (time.time() - startTime)) ########################################## #Cleanup the temporary directory ########################################## system("rm -rf %s" % localSlaveTempDir) ########################################## #Finish up the stats ########################################## if stats != None: stats.attrib["time"] = str(time.time() - startTime) stats.attrib["clock"] = str(getTotalCpuTime() - startClock) fileHandle = open(job.attrib["stats"], 'w') ET.ElementTree(stats).write(fileHandle) fileHandle.close()
def main(): sys.path += [ sys.argv[1] ] sys.argv.remove(sys.argv[1]) #Now we can import all the stuff.. from jobTree.src.bioio import getBasicOptionParser from jobTree.src.bioio import parseBasicOptions from jobTree.src.bioio import logger from jobTree.src.bioio import addLoggingFileHandler from jobTree.src.bioio import setLogLevel from jobTree.src.bioio import getTotalCpuTime from jobTree.src.master import writeJobs ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--job", dest="jobFile", help="Job file containing command to run", default="None") options, args = parseBasicOptions(parser) assert len(args) == 0 ########################################## #Parse the job. ########################################## job = ET.parse(options.jobFile).getroot() ########################################## #Setup the logging ########################################## #Setup the logging setLogLevel(job.attrib["log_level"]) addLoggingFileHandler(job.attrib["slave_log_file"], rotatingLogging=False) logger.info("Parsed arguments and set up logging") ########################################## #Setup the stats, if requested ########################################## if job.attrib.has_key("stats"): startTime = time.time() startClock = time.clock() stats = ET.Element("slave") else: stats = None ########################################## #Run the script. ########################################## maxTime = float(job.attrib["job_time"]) assert maxTime > 0.0 assert maxTime < sys.maxint jobToRun = job.find("followOns").findall("followOn")[-1] memoryAvailable = int(jobToRun.attrib["memory"]) cpuAvailable = int(jobToRun.attrib["cpu"]) while True: processJob(job, jobToRun, memoryAvailable, cpuAvailable, stats) if job.attrib["colour"] != "black": logger.info("Exiting the slave because of a failed job") break totalRuntime = float(job.attrib["total_time"]) #This is the estimate runtime of the jobs on the followon stack childrenNode = job.find("children") childrenList = childrenNode.findall("child") #childRuntime = sum([ float(child.attrib["time"]) for child in childrenList ]) if len(childrenList) >= 2: # or totalRuntime + childRuntime > maxTime: #We are going to have to return to the parent logger.info("No more jobs can run in series by this slave, its got %i children" % len(childrenList)) break followOns = job.find("followOns") while len(childrenList) > 0: child = childrenList.pop() childrenNode.remove(child) totalRuntime += float(child.attrib["time"]) ET.SubElement(followOns, "followOn", child.attrib.copy()) #assert totalRuntime <= maxTime + 1 #The plus one second to avoid unimportant rounding errors job.attrib["total_time"] = str(totalRuntime) assert len(childrenNode.findall("child")) == 0 if len(followOns.findall("followOn")) == 0: logger.info("No more jobs can run by this slave as we have exhausted the follow ons") break #Get the next job and see if we have enough cpu and memory to run it.. jobToRun = job.find("followOns").findall("followOn")[-1] if int(jobToRun.attrib["memory"]) > memoryAvailable: logger.info("We need more memory for the next job, so finishing") break if int(jobToRun.attrib["cpu"]) > cpuAvailable: logger.info("We need more cpus for the next job, so finishing") break ##Updated the job so we can start the next loop cycle job.attrib["colour"] = "grey" writeJobs([ job ]) logger.info("Updated the status of the job to grey and starting the next job") #Write back the job file with the updated jobs, using the checkpoint method. writeJobs([ job ]) logger.info("Written out an updated job file") logger.info("Finished running the chain of jobs on this node") ########################################## #Finish up the stats ########################################## if stats != None: stats.attrib["time"] = str(time.time() - startTime) stats.attrib["clock"] = str(getTotalCpuTime() - startClock) fileHandle = open(job.attrib["stats"], 'w') ET.ElementTree(stats).write(fileHandle) fileHandle.close()