Exemplo n.º 1
0
 def __init__(self, config, maxCpus, maxMemory):
     AbstractBatchSystem.__init__(self, config, maxCpus, maxMemory) #Call the parent constructor
     if maxMemory != sys.maxint:
         logger.critical("A max memory has been specified for the parasol batch system class of %i, but currently this batchsystem interface does not support such limiting" % maxMemory)
     #Keep the name of the results file for the pstat2 command..
     self.parasolCommand = config.attrib["parasol_command"]
     self.parasolResultsFile = getParasolResultsFileName(config.attrib["job_tree"])
     #Reset the job queue and results (initially, we do this again once we've killed the jobs)
     self.queuePattern = re.compile("q\s+([0-9]+)")
     self.runningPattern = re.compile("r\s+([0-9]+)\s+[\S]+\s+[\S]+\s+([0-9]+)\s+[\S]+")
     self.killJobs(self.getIssuedJobIDs()) #Kill any jobs on the current stack
     logger.info("Going to sleep for a few seconds to kill any existing jobs")
     time.sleep(5) #Give batch system a second to sort itself out.
     logger.info("Removed any old jobs from the queue")
     #Reset the job queue and results
     exitValue = popenParasolCommand("%s -results=%s clear sick" % (self.parasolCommand, self.parasolResultsFile), False)[0]
     if exitValue != None:
         logger.critical("Could not clear sick status of the parasol batch %s" % self.parasolResultsFile)
     exitValue = popenParasolCommand("%s -results=%s flushResults" % (self.parasolCommand, self.parasolResultsFile), False)[0]
     if exitValue != None:
         logger.critical("Could not flush the parasol batch %s" % self.parasolResultsFile)
     open(self.parasolResultsFile, 'w').close()
     logger.info("Reset the results queue")
     #Stuff to allow max cpus to be work
     self.outputQueue1 = Queue()
     self.outputQueue2 = Queue()
     #worker = Thread(target=getUpdatedJob, args=(self.parasolResultsFileHandle, self.outputQueue1, self.outputQueue2))
     #worker.setDaemon(True)
     worker = Process(target=getUpdatedJob, args=(self.parasolResultsFile, self.outputQueue1, self.outputQueue2))
     worker.daemon = True
     worker.start()
     self.usedCpus = 0
     self.jobIDsToCpu = {}
Exemplo n.º 2
0
 def __init__(self, config, maxCpus, maxMemory):
     AbstractBatchSystem.__init__(self, config, maxCpus,
                                  maxMemory)  #Call the parent constructor
     if maxMemory != sys.maxint:
         logger.critical(
             "A max memory has been specified for the parasol batch system class of %i, but currently this batchsystem interface does not support such limiting"
             % maxMemory)
     #Keep the name of the results file for the pstat2 command..
     self.parasolCommand = config.attrib["parasol_command"]
     self.parasolResultsFile = getParasolResultsFileName(
         config.attrib["job_tree"])
     #Reset the job queue and results (initially, we do this again once we've killed the jobs)
     self.queuePattern = re.compile("q\s+([0-9]+)")
     self.runningPattern = re.compile(
         "r\s+([0-9]+)\s+[\S]+\s+[\S]+\s+([0-9]+)\s+[\S]+")
     self.killJobs(
         self.getIssuedJobIDs())  #Kill any jobs on the current stack
     logger.info(
         "Going to sleep for a few seconds to kill any existing jobs")
     time.sleep(5)  #Give batch system a second to sort itself out.
     logger.info("Removed any old jobs from the queue")
     #Reset the job queue and results
     exitValue = popenParasolCommand(
         "%s -results=%s clear sick" %
         (self.parasolCommand, self.parasolResultsFile), False)[0]
     if exitValue != None:
         logger.critical(
             "Could not clear sick status of the parasol batch %s" %
             self.parasolResultsFile)
     exitValue = popenParasolCommand(
         "%s -results=%s flushResults" %
         (self.parasolCommand, self.parasolResultsFile), False)[0]
     if exitValue != None:
         logger.critical("Could not flush the parasol batch %s" %
                         self.parasolResultsFile)
     open(self.parasolResultsFile, 'w').close()
     logger.info("Reset the results queue")
     #Stuff to allow max cpus to be work
     self.outputQueue1 = Queue()
     self.outputQueue2 = Queue()
     #worker = Thread(target=getUpdatedJob, args=(self.parasolResultsFileHandle, self.outputQueue1, self.outputQueue2))
     #worker.setDaemon(True)
     worker = Process(target=getUpdatedJob,
                      args=(self.parasolResultsFile, self.outputQueue1,
                            self.outputQueue2))
     worker.daemon = True
     worker.start()
     self.usedCpus = 0
     self.jobIDsToCpu = {}
Exemplo n.º 3
0
    def __init__(self, config, maxCpus, maxMemory):
        AbstractBatchSystem.__init__(self, config, maxCpus, maxMemory) #Call the parent constructor
        self.lsfResultsFile = getParasolResultsFileName(config.attrib["job_tree"])
        #Reset the job queue and results (initially, we do this again once we've killed the jobs)
        self.lsfResultsFileHandle = open(self.lsfResultsFile, 'w')
        self.lsfResultsFileHandle.close() #We lose any previous state in this file, and ensure the files existence
        self.currentjobs = set()
        self.obtainSystemConstants()
        self.jobIDs = dict()
        self.lsfJobIDs = dict()
        self.nextJobID = 0

        self.newJobsQueue = Queue()
        self.updatedJobsQueue = Queue()
        self.worker = Worker(self.newJobsQueue, self.updatedJobsQueue, self)
        self.worker.setDaemon(True)
        self.worker.start()
Exemplo n.º 4
0
    def __init__(self, config, maxCpus, maxMemory):
        AbstractBatchSystem.__init__(self, config, maxCpus, maxMemory) #Call the parent constructor
        self.gridengineResultsFile = getParasolResultsFileName(config.attrib["job_tree"])
        #Reset the job queue and results (initially, we do this again once we've killed the jobs)
        self.gridengineResultsFileHandle = open(self.gridengineResultsFile, 'w')
        self.gridengineResultsFileHandle.close() #We lose any previous state in this file, and ensure the files existence
        self.currentjobs = set()
        self.obtainSystemConstants()
        self.nextJobID = 0

        self.newJobsQueue = Queue()
        self.updatedJobsQueue = Queue()
        self.killQueue = Queue()
        self.killedJobsQueue = Queue()
        self.worker = Worker(self.newJobsQueue, self.updatedJobsQueue, self.killQueue, self.killedJobsQueue, self)
        self.worker.setDaemon(True)
        self.worker.start()
Exemplo n.º 5
0
    def __init__(self, config, maxCpus, maxMemory):
        """
        Stuff
        """
        AbstractBatchSystem.__init__(self, config, maxCpus,
                                     maxMemory)  #Call the parent constructor

        self.resultsFile = getParasolResultsFileName(config.attrib["job_tree"])

        #Reset the job queue and results (initially, we do this again once we've killed the jobs)
        self.resultsFileHandle = open(self.resultsFile, 'w')
        self.resultsFileHandle.close(
        )  #We lose any previous state in this file, and ensure the files existence

        self.currentjobs = set()
        self.obtainSystemConstants()
        self.jobIDs = dict()
        self.slurmJobTasks = dict()
        self.nextJobID = 0

        self.newJobsQueue = Queue()
        self.updatedJobsQueue = Queue()

        # store any of the slurm options
        slurmopts = dict()

        optiondata = SlurmBatchSystem.getOptionData()
        for switch, data in optiondata.iteritems():
            key = data['dest']
            if key in config.attrib:
                sbatchopt = re.sub(r'^slurm_', '', key)
                slurmopts[sbatchopt] = config.attrib[key]
        self.worker = Worker(self.newJobsQueue, self.updatedJobsQueue, self,
                             slurmopts)
        self.worker.setDaemon(True)
        self.worker.start()