def main(): parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--job", dest="jobFile", help="Job file containing command to run", default="None") parser.add_option( "--treePointer", dest="treePointerFile", help="File containing pointer to the tree data", default="None" ) options, args = parseBasicOptions(parser) logger.info("Parsed the input arguments") job = ET.parse(options.jobFile).getroot() setLogLevel(job.attrib["log_level"]) logger.info("Parsed the job XML") treePointer = ET.parse(options.treePointerFile).getroot() logger.info("Parsed the tree pointer XML") tree = ET.parse(treePointer.attrib["file"]).getroot() logger.info("Parsed the tree XML") for child in tree.find("children").findall("child"): # Make the chuld tree pointer childTreePointerFile = makeTreePointer(child.attrib["file"], getTempFile(rootDir=job.attrib["global_temp_dir"])) # Make the child command unbornChild = ET.SubElement(job.find("children"), "child") command = "jobTreeTest_CommandFirst.py --treePointer %s --job JOB_FILE" % (childTreePointerFile,) unbornChild.attrib["command"] = command if random.random() > 0.2: unbornChild.attrib["time"] = str(random.random() * 10) # Make the child tree pointer ET.SubElement(treePointer.find("children"), "child", {"file": childTreePointerFile}) job.attrib["command"] = "jobTreeTest_CommandSecond.py --treePointer %s --job JOB_FILE" % (options.treePointerFile,) logger.info("Made new command") fileHandle = open(options.jobFile, "w") ET.ElementTree(job).write(fileHandle) fileHandle.close() logger.info("Updated the job file") print >> sys.stderr, "Checking that we can report to std err" # These lines should end up in the logs print "Checking that we can report to std out" if random.random() > 0.9: logger.info("Going to fail the job") sys.exit(1) logger.info("Going to pass the job done okay") sys.exit(0)
def main(): parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--jobTree", dest="jobTree", help="Directory containing the job tree to kill") options, args = parseBasicOptions(parser) logger.info("Parsed arguments") assert len(args) == 0 #This program takes no arguments assert options.jobTree != None #The jobtree should not be null assert os.path.isdir(options.jobTree) #The job tree must exist if we are going to kill it. logger.info("Starting routine to kill running jobs in the jobTree: %s" % options.jobTree) config = ET.parse(os.path.join(options.jobTree, "config.xml")).getroot() batchSystem = loadTheBatchSystem(config) #This should automatically kill the existing jobs.. so we're good. for job in batchSystem.getIssuedJobIDs(): #Just in case we do it again. batchSystem.killJobs(job) logger.info("All jobs SHOULD have been killed")
def main(): sys.path += [ sys.argv[1] ] sys.argv.remove(sys.argv[1]) #Now we can import all the stuff.. from workflow.jobTree.lib.bioio import getBasicOptionParser from workflow.jobTree.lib.bioio import parseBasicOptions from workflow.jobTree.lib.bioio import logger from workflow.jobTree.lib.bioio import addLoggingFileHandler from workflow.jobTree.lib.bioio import setLogLevel from workflow.jobTree.lib.bioio import getTotalCpuTime from workflow.jobTree.lib.master import writeJobs ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--job", dest="jobFile", help="Job file containing command to run", default="None") options, args = parseBasicOptions(parser) assert len(args) == 0 ########################################## #Parse the job. ########################################## job = ET.parse(options.jobFile).getroot() ########################################## #Setup the logging ########################################## #Setup the logging setLogLevel(job.attrib["log_level"]) addLoggingFileHandler(job.attrib["slave_log_file"], rotatingLogging=False) logger.info("Parsed arguments and set up logging") ########################################## #Setup the stats, if requested ########################################## if job.attrib.has_key("stats"): startTime = time.time() startClock = time.clock() stats = ET.Element("slave") else: stats = None ########################################## #Run the script. ########################################## maxTime = float(job.attrib["job_time"]) assert maxTime > 0.0 assert maxTime < sys.maxint jobToRun = job.find("followOns").findall("followOn")[-1] memoryAvailable = int(jobToRun.attrib["memory"]) cpuAvailable = int(jobToRun.attrib["cpu"]) while True: processJob(job, jobToRun, memoryAvailable, cpuAvailable, stats) if job.attrib["colour"] != "black": logger.info("Exiting the slave because of a failed job") break totalRuntime = float(job.attrib["total_time"]) #This is the estimate runtime of the jobs on the followon stack childrenNode = job.find("children") childrenList = childrenNode.findall("child") #childRuntime = sum([ float(child.attrib["time"]) for child in childrenList ]) if len(childrenList) >= 2: # or totalRuntime + childRuntime > maxTime: #We are going to have to return to the parent logger.info("No more jobs can run in series by this slave, its got %i children" % len(childrenList)) break followOns = job.find("followOns") while len(childrenList) > 0: child = childrenList.pop() childrenNode.remove(child) totalRuntime += float(child.attrib["time"]) ET.SubElement(followOns, "followOn", child.attrib.copy()) #assert totalRuntime <= maxTime + 1 #The plus one second to avoid unimportant rounding errors job.attrib["total_time"] = str(totalRuntime) assert len(childrenNode.findall("child")) == 0 if len(followOns.findall("followOn")) == 0: logger.info("No more jobs can run by this slave as we have exhausted the follow ons") break #Get the next job and see if we have enough cpu and memory to run it.. jobToRun = job.find("followOns").findall("followOn")[-1] if int(jobToRun.attrib["memory"]) > memoryAvailable: logger.info("We need more memory for the next job, so finishing") break if int(jobToRun.attrib["cpu"]) > cpuAvailable: logger.info("We need more cpus for the next job, so finishing") break ##Updated the job so we can start the next loop cycle job.attrib["colour"] = "grey" writeJobs([ job ]) logger.info("Updated the status of the job to grey and starting the next job") #Write back the job file with the updated jobs, using the checkpoint method. writeJobs([ job ]) logger.info("Written out an updated job file") logger.info("Finished running the chain of jobs on this node") ########################################## #Finish up the stats ########################################## if stats != None: stats.attrib["time"] = str(time.time() - startTime) stats.attrib["clock"] = str(getTotalCpuTime() - startClock) fileHandle = open(job.attrib["stats"], 'w') ET.ElementTree(stats).write(fileHandle) fileHandle.close()
def main(): """Reports the state of the job tree. """ ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog [options] \nThe colours returned indicate the state of the job.\n\ \twhite: job has not been started yet\n\ \tgrey: job is issued to batch system\n\ \tred: job failed\n\ \tblue: job has children currently being processed\n\ \tblack: job has finished and will be processed (transient state)\n\ \tdead: job is totally finished and is awaiting deletion (transient state)", "%prog 0.1") parser.add_option("--jobTree", dest="jobTree", help="Directory containing the job tree") parser.add_option("--verbose", dest="verbose", action="store_true", help="Print loads of information, particularly all the log files of errors", default=False) parser.add_option("--failIfNotComplete", dest="failIfNotComplete", action="store_true", help="Return exit value of 1 if job tree jobs not all completed", default=False) options, args = parseBasicOptions(parser) logger.info("Parsed arguments") assert len(args) == 0 if len(sys.argv) == 1: parser.print_help() sys.exit(0) ########################################## #Do some checks. ########################################## logger.info("Checking if we have files for job tree") assert options.jobTree != None assert os.path.isdir(options.jobTree) #The given job dir tree must exist. assert os.path.isfile(os.path.join(options.jobTree, "config.xml")) #A valid job tree must contain the config gile assert os.path.isdir(os.path.join(options.jobTree, "jobs")) #A job tree must have a directory of jobs. assert os.path.isdir(os.path.join(options.jobTree, "tempDirDir")) #A job tree must have a directory of temporary directories (for jobs to make temp files in). assert os.path.isdir(os.path.join(options.jobTree, "logFileDir")) #A job tree must have a directory of log files. assert os.path.isdir(os.path.join(options.jobTree, "slaveLogFileDir")) #A job tree must have a directory of slave log files. ########################################## #Read the total job number ########################################## config = ET.parse(os.path.join(options.jobTree, "config.xml")).getroot() ########################################## #Survey the status of the job and report. ########################################## colours = {} jobFiles = TempFileTree(config.attrib["job_file_dir"]).listFiles() if len(jobFiles) > 0: logger.info("Collating the colours of the job tree") for absFileName in jobFiles: job = parseJobFile(absFileName) if job != None: if not colours.has_key(job.attrib["colour"]): colours[job.attrib["colour"]] = 0 colours[job.attrib["colour"]] += 1 else: logger.info("There are no jobs to collate") print "There are %i jobs currently in job tree: %s" % \ (len(jobFiles), options.jobTree) for colour in colours.keys(): print "\tColour: %s, number of jobs: %s" % (colour, colours[colour]) if options.verbose: #Verbose currently means outputting the files that have failed. for absFileName in jobFiles: job = parseJobFile(absFileName) if job != None: if job.attrib["colour"] == "red": if os.path.isfile(job.attrib["log_file"]): def fn(string): print string logFile(job.attrib["log_file"], fn) else: logger.info("Log file for job %s is not present" % job.attrib["file"]) if len(jobFiles) != 0 and options.failIfNotComplete: sys.exit(1)
from workflow.jobTree.lib.bioio import logger from workflow.jobTree.lib.bioio import getBasicOptionParser from workflow.jobTree.lib.bioio import parseBasicOptions from workflow.jobTree.scriptTree.stack import loadPickleFile parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--job", dest="jobFile", help="Job file containing command to run") parser.add_option("--target", dest="target", help="File containing a pickled, wrapped instance of target classes") options, args = parseBasicOptions(parser) assert options.target != None logger.info("Parsed the input arguments") #Naughty stuff to do the import of the target we need for className in args: logger.info("Loading the class name", className) l = className.split(".") moduleName = ".".join(l[:-1]) className = l[-1] _temp = __import__(moduleName, globals(), locals(), [ className ], -1) exec "%s = 1" % className vars()[className] = _temp.__dict__[className]
def main(): parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--job", dest="jobFile", help="Job file containing command to run", default="None") parser.add_option("--treePointer", dest="treePointer", help="File containing pointer to the tree data", default="None") options, args = parseBasicOptions(parser) logger.info("Parsed the input arguments") print >>sys.stderr, "Checking that we can report to std err" #These lines should end up in the logs print "Checking that we can report to std out" job = ET.parse(options.jobFile).getroot() setLogLevel(job.attrib["log_level"]) logger.info("Parsed the job XML") treePointer = ET.parse(options.treePointer).getroot() logger.info("Parsed the tree pointer XML") tree = ET.parse(treePointer.attrib["file"]).getroot() logger.info("Parsed the tree XML") i = 0 children = tree.find("children").findall("child") if len(children) > 0: for child in children: #Parse the child XML tree childTree = ET.parse(child.attrib["file"]).getroot() i += int(childTree.attrib["count"]) else: i = 1 tree.attrib["count"] = str(i) logger.info("Calculated the leaf count: %i" % i) fileHandle = open(treePointer.attrib["file"], 'w') ET.ElementTree(tree).write(fileHandle) fileHandle.close() logger.info("Updated the tree file: %s" % treePointer.attrib["file"]) for childPointer in treePointer.find("children").findall("child"): if os.path.isfile(childPointer.attrib["file"]): os.remove(childPointer.attrib["file"]) logger.info("Removed the child pointer files") logger.info("No need to update the job file, as we didn't make anything new!") if random.random() > 0.9: logger.info("Going to fail the job") sys.exit(1) logger.info("Going to pass the job done okay") sys.exit(0)
def main(): """Reports stats on the job-tree, use in conjunction with --stats options to jobTree. """ ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog", "%prog 0.1") parser.add_option("--jobTree", dest="jobTree", help="Directory containing the job tree") parser.add_option("--outputFile", dest="outputFile", default=None, help="File in which to write results") options, args = parseBasicOptions(parser) logger.info("Parsed arguments") assert len(args) == 0 if len(sys.argv) == 1: parser.print_help() sys.exit(0) ########################################## #Do some checks. ########################################## logger.info("Checking if we have files for job tree") if options.jobTree == None: raise RuntimeError("You did not specify the job-tree") if not os.path.isdir(options.jobTree): raise RuntimeError("The given job dir tree does not exist: %s" % options.jobTree) if not os.path.isfile(os.path.join(options.jobTree, "config.xml")): raise RuntimeError("A valid job tree must contain the config file") if not os.path.isfile(os.path.join(options.jobTree, "stats.xml")): raise RuntimeError("The job-tree was run without the --stats flag, so no stats were created") ########################################## #Read the stats and config ########################################## config = ET.parse(os.path.join(options.jobTree, "config.xml")).getroot() stats = ET.parse(os.path.join(options.jobTree, "stats.xml")).getroot() ########################################## #Collate the stats and report ########################################## def fn(element, items, itemName): itemTimes = [ float(item.attrib["time"]) for item in items ] itemTimes.sort() itemClocks = [ float(item.attrib["clock"]) for item in items ] itemClocks.sort() itemWaits = [ float(item.attrib["time"]) - float(item.attrib["clock"]) for item in items ] itemWaits.sort() if len(itemTimes) == 0: itemTimes.append(0) itemClocks.append(0) itemWaits.append(0) return ET.SubElement(element, itemName, { "total_number":str(len(items)), "total_time":str(sum(itemTimes)), "median_time":str(itemTimes[len(itemTimes)/2]), "average_time":str(sum(itemTimes)/len(itemTimes)), "min_time":str(min(itemTimes)), "max_time":str(max(itemTimes)), "total_clock":str(sum(itemClocks)), "median_clock":str(itemClocks[len(itemClocks)/2]), "average_clock":str(sum(itemClocks)/len(itemClocks)), "min_clock":str(min(itemClocks)), "max_clock":str(max(itemClocks)), "total_wait":str(sum(itemWaits)), "median_wait":str(itemWaits[len(itemWaits)/2]), "average_wait":str(sum(itemWaits)/len(itemWaits)), "min_wait":str(min(itemWaits)), "max_wait":str(max(itemWaits)) }) def fn2(element, containingItems, containingItemName, getFn): itemCounts = [ len(getFn(containingItem)) for containingItem in containingItems ] itemCounts.sort() if len(itemCounts) == 0: itemCounts.append(0) element.attrib["median_number_per_%s" % containingItemName] = str(itemCounts[len(itemCounts)/2]) element.attrib["average_number_per_%s" % containingItemName] = str(float(sum(itemCounts))/len(itemCounts)) element.attrib["min_number_per_%s" % containingItemName] = str(min(itemCounts)) element.attrib["max_number_per_%s" % containingItemName] = str(max(itemCounts)) if stats.find("total_time") == None: #Hack to allow it to work on unfinished jobtrees. ET.SubElement(stats, "total_time", { "time":"0.0", "clock":"0.0"}) collatedStatsTag = ET.Element("collated_stats", { "total_run_time":stats.find("total_time").attrib["time"], "total_clock":stats.find("total_time").attrib["clock"], "batch_system":config.attrib["batch_system"], "job_time":config.attrib["job_time"], "default_memory":config.attrib["default_memory"], "default_cpu":config.attrib["default_cpu"], "max_jobs":config.attrib["max_jobs"], "max_threads":config.attrib["max_threads"] }) #Add slave info slaves = stats.findall("slave") fn(collatedStatsTag, slaves, "slave") #Add job info jobs = [] for slave in slaves: jobs += slave.findall("job") def fn3(slave): return slave.findall("job") fn2(fn(collatedStatsTag, jobs, "job"), slaves, "slave", fn3) #Add aggregated target info targets = [] for job in jobs: for stack in job.findall("stack"): targets += stack.findall("target") def fn4(job): targets = [] for stack in job.findall("stack"): targets += stack.findall("target") return targets fn2(fn(collatedStatsTag, targets, "target"), jobs, "job", fn4) #Get info for each target targetNames = set() for target in targets: targetNames.add(target.attrib["class"]) targetTypesTag = ET.SubElement(collatedStatsTag, "target_types") for targetName in targetNames: targetTypes = [ target for target in targets if target.attrib["class"] == targetName ] targetTypeTag = fn(targetTypesTag, targetTypes, targetName) estimatedRunTimes = [ float(target.attrib["e_time"]) for target in targetTypes ] targetTypeTag.attrib["estimated_time"] = str(sum(estimatedRunTimes)/len(estimatedRunTimes)) def prettify(elem): """Return a pretty-printed XML string for the Element. """ rough_string = ET.tostring(elem, 'utf-8') reparsed = minidom.parseString(rough_string) return reparsed.toprettyxml(indent=" ") #Now dump it all out to file if options.outputFile != None: fileHandle = open(options.outputFile, 'w') #ET.ElementTree(collatedStatsTag).write(fileHandle) fileHandle.write(prettify(collatedStatsTag)) fileHandle.close() #Now dump onto the screen print prettify(collatedStatsTag)