def parasolStop(): """Function stops the parasol hub and node. """ machineList = os.path.join(workflowRootPath(), "workflow", "jobTree", "machineList") i = os.system("paraNodeStop %s" % machineList) j = os.system("paraHubStop now") return i, j
def parasolRestart(): """Function starts the parasol hub and node. """ parasolStop() while True: machineList = os.path.join(workflowRootPath(), "workflow", "jobTree", "machineList") #pathEnvVar = os.environ["PATH"] os.system("paraNode start -hub=localhost") #-umask=002 -userPath=%s -sysPath=%s" % (pathEnvVar, pathEnvVar)) os.system("paraHub %s subnet=127.0.0 &" % (machineList,)) tempFile = getTempFile() dead = True try: popen("parasol status", tempFile) fileHandle = open(tempFile, 'r') line = fileHandle.readline() while line != '': if "Nodes dead" in line: print line if int(line.split()[-1]) == 0: dead = False line = fileHandle.readline() fileHandle.close() except RuntimeError: pass os.remove(tempFile) if not dead: break else: logger.info("Tried to restart the parasol process, but failed, will try again") parasolStop() time.sleep(5) logger.info("Restarted the parasol process")
def issueJobs(jobs, jobIDsToJobsHash, batchSystem): """Issues jobs to the batch system. """ jobCommands = {} for job in jobs: jobCommand = os.path.join(workflowRootPath(), "bin", "jobTreeSlave") followOnJob = job.find("followOns").findall("followOn")[-1] jobCommands["%s -E %s %s --job %s" % (sys.executable, jobCommand, os.path.split(workflowRootPath())[0], job.attrib["file"])] = (job.attrib["file"], int(followOnJob.attrib["memory"]), int(followOnJob.attrib["cpu"]), job.attrib["slave_log_file"]) issuedJobs = batchSystem.issueJobs([ (key, jobCommands[key][1], jobCommands[key][2], jobCommands[key][3]) for key in jobCommands.keys() ]) assert len(issuedJobs.keys()) == len(jobCommands.keys()) for jobID in issuedJobs.keys(): command = issuedJobs[jobID] jobFile = jobCommands[command][0] assert jobID not in jobIDsToJobsHash jobIDsToJobsHash[jobID] = jobFile logger.debug("Issued the job: %s with job id: %i " % (jobFile, jobID))