示例#1
0
    def sendEventRangeToAthenaMP(self, eventRanges):
        block_sig(signal.SIGTERM)

        if "No more events" in eventRanges:
            self.__log.debug("Rank %s: sendEventRangeToAthenaMP: %s" % (self.__rank, eventRanges))
            self.__messageInQueue.put(eventRanges)
        else:
            if type(eventRanges) is not list:
                eventRanges = [eventRanges]
            eventRangeFormat = json.dumps(eventRanges)
            self.__log.debug("Rank %s: sendEventRangeToAthenaMP: %s" % (self.__rank, eventRangeFormat))
            self.__messageInQueue.put(eventRangeFormat)
            self.__totalQueuedEvents += 1

            for eventRange in eventRanges:
                eventRangeID = eventRange['eventRangeID']
                self.__eventRangesStatus[eventRangeID]['status'] = 'processing'
                #eventRanges= eval(eventRange)
                #for eventRange in eventRanges:
                #    eventRangeID = eventRange['eventRangeID']
                #    self.__eventRangesStatus[eventRangeID]['status'] = 'processing'

        self.__athenaMP_isReady = False

        unblock_sig(signal.SIGTERM)
示例#2
0
    def sendEventRangeToAthenaMP(self, eventRanges):
        block_sig(signal.SIGTERM)

        if "No more events" in eventRanges:
            self.__log.debug("Rank %s: sendEventRangeToAthenaMP: %s" % (self.__rank, eventRanges))
            self.__messageInQueue.put(eventRanges)
        else:
            if type(eventRanges) is not list:
                eventRanges = [eventRanges]
            eventRangeFormat = json.dumps(eventRanges)
            self.__log.debug("Rank %s: sendEventRangeToAthenaMP: %s" % (self.__rank, eventRangeFormat))
            self.__messageInQueue.put(eventRangeFormat)
            self.__totalQueuedEvents += 1

            for eventRange in eventRanges:
                eventRangeID = eventRange['eventRangeID']
                self.__eventRangesStatus[eventRangeID]['status'] = 'processing'
                #eventRanges= eval(eventRange)
                #for eventRange in eventRanges:
                #    eventRangeID = eventRange['eventRangeID']
                #    self.__eventRangesStatus[eventRangeID]['status'] = 'processing'

        self.__athenaMP_isReady = False

        unblock_sig(signal.SIGTERM)
示例#3
0
 def handleMessage(self):
     block_sig(signal.SIGTERM)
     try:
         #message = self.__messageQueue.get(True, self.__pollTimeout)
         message = self.__messageQueue.get(False)
         #self.__messageQueue.task_done()
     except Queue.Empty:
         unblock_sig(signal.SIGTERM)
         return False
     else:
         if self.__readyForEventTime is None:
             self.__readyForEventTime = time.time()
         self.__log.debug("Rank %s: Received message: %s" %
                          (self.__rank, message))
         if "Ready for events" in message:
             self.__athenaMP_isReady = True
             self.__athenaMP_needEvents += 1
         elif message.startswith("/"):
             self.__totalProcessedEvents += 1
             self.__numOutputs += 1
             # self.__outputMessage.append(message)
             try:
                 # eventRangeID = message.split(',')[0].split('.')[-1]
                 eventRangeID = message.split(',')[-3].replace(
                     "ID:", "").replace("ID: ", "")
                 self.__eventRangesStatus[eventRangeID][
                     'status'] = 'finished'
                 self.__eventRangesStatus[eventRangeID]['output'] = message
                 self.__outputMessage.append(
                     (eventRangeID, 'finished', message))
             except Exception, e:
                 self.__log.warning(
                     "Rank %s: output message format is not recognized: %s "
                     % (self.__rank, message))
                 self.__log.warning("Rank %s: %s" % (self.__rank, str(e)))
         elif message.startswith('ERR'):
             self.__log.error("Rank %s: Received an error message: %s" %
                              (self.__rank, message))
             error_acronym, eventRangeID, error_diagnostics = self.extractErrorMessage(
                 message)
             if eventRangeID != "":
                 try:
                     self.__log.error(
                         "Rank %s: !!WARNING!!2144!! Extracted error acronym %s and error diagnostics \'%s\' for event range %s"
                         % (self.__rank, error_acronym, error_diagnostics,
                            eventRangeID))
                     self.__eventRangesStatus[eventRangeID][
                         'status'] = 'failed'
                     self.__eventRangesStatus[eventRangeID][
                         'output'] = message
                     self.__outputMessage.append(
                         (eventRangeID, error_acronym, message))
                 except Exception, e:
                     self.__log.warning(
                         "Rank %s: output message format is not recognized: %s "
                         % (self.__rank, message))
                     self.__log.warning("Rank %s: %s" %
                                        (self.__rank, str(e)))
示例#4
0
    def flushMessages(self):
        block_sig(signal.SIGTERM)

        self.__log.info("Rank %s: ESJobManager flush messages" % self.__rank)
        while self.isReady():
            self.__log.info("Rank %s: AthenaMP is ready, send 'No more events' to it." % self.__rank)
            self.sendEventRangeToAthenaMP("No more events")
        while self.handleMessage():
            pass

        unblock_sig(signal.SIGTERM)
示例#5
0
    def flushMessages(self):
        block_sig(signal.SIGTERM)

        self.__log.info("Rank %s: ESJobManager flush messages" % self.__rank)
        while self.isReady():
            self.__log.info("Rank %s: AthenaMP is ready, send 'No more events' to it." % self.__rank)
            self.sendEventRangeToAthenaMP("No more events")
        while self.handleMessage():
            pass

        unblock_sig(signal.SIGTERM)
示例#6
0
文件: Yoda.py 项目: complynx/pilot
 def stop(self, signum=None, frame=None):
     self.tmpLog.info('stop signal received')
     block_sig(signal.SIGTERM)
     #self.flushMessages()
     #self.updateFailedEventRanges()
     # final dump
     self.tmpLog.info('final dumping')
     self.updateEventRangesToDB(force=True, final=True)
     #self.db.dumpUpdates(True)
     self.tmpLog.info("post Exec job")
     self.postExecJob()
     self.tmpLog.info('stop')
     unblock_sig(signal.SIGTERM)
示例#7
0
文件: Droid.py 项目: complynx/pilot
    def stop(self, signum=None, frame=None):
        self.__tmpLog.info('Rank %s: stop signal received' % self.__rank)
        block_sig(signal.SIGTERM)
        self.__esJobManager.terminate()
        self.__esJobManager.flushMessages()
        self.updateOutputs(signal=True, final=True)

        self.__tmpLog.info("Rank %s: post exec job" % self.__rank)
        self.postExecJob()
        #self.__tmpLog.info("Rank %s: finish job" % self.__rank)
        #self.finishJob()

        self.__tmpLog.info('Rank %s: stop' % self.__rank)
        unblock_sig(signal.SIGTERM)
示例#8
0
 def handleMessage(self):
     block_sig(signal.SIGTERM)
     try:
         #message = self.__messageQueue.get(True, self.__pollTimeout)
         message = self.__messageQueue.get(False)
         #self.__messageQueue.task_done()
     except Queue.Empty:
         unblock_sig(signal.SIGTERM)
         return False
     else:
         if self.__readyForEventTime is None:
             self.__readyForEventTime = time.time()
         self.__log.debug("Rank %s: Received message: %s" % (self.__rank, message))
         if "Ready for events" in message:
             self.__athenaMP_isReady = True
             self.__athenaMP_needEvents += 1
         elif message.startswith("/"):
             self.__totalProcessedEvents += 1
             self.__numOutputs += 1
             # self.__outputMessage.append(message)
             try:
                 # eventRangeID = message.split(',')[0].split('.')[-1]
                 eventRangeID = message.split(',')[-3].replace("ID:", "").replace("ID: ", "")
                 self.__eventRangesStatus[eventRangeID]['status'] = 'finished'
                 self.__eventRangesStatus[eventRangeID]['output'] = message
                 self.__outputMessage.append((eventRangeID, 'finished', message))
             except Exception, e:
                 self.__log.warning("Rank %s: output message format is not recognized: %s " % (self.__rank, message))
                 self.__log.warning("Rank %s: %s" % (self.__rank, str(e)))
         elif message.startswith('ERR'):
             self.__log.error("Rank %s: Received an error message: %s" % (self.__rank, message))
             error_acronym, eventRangeID, error_diagnostics = self.extractErrorMessage(message)
             if eventRangeID != "":
                 try:
                     self.__log.error("Rank %s: !!WARNING!!2144!! Extracted error acronym %s and error diagnostics \'%s\' for event range %s" % (self.__rank, error_acronym, error_diagnostics, eventRangeID))
                     self.__eventRangesStatus[eventRangeID]['status'] = 'failed'
                     self.__eventRangesStatus[eventRangeID]['output'] = message
                     self.__outputMessage.append((eventRangeID, error_acronym, message))
                 except Exception, e:
                     self.__log.warning("Rank %s: output message format is not recognized: %s " % (self.__rank, message))
                     self.__log.warning("Rank %s: %s" % (self.__rank, str(e)))
示例#9
0
文件: Yoda.py 项目: PalNilsson/pilot
 def stop(self, signum=None, frame=None):
     self.tmpLog.info('stop signal received')
     block_sig(signal.SIGTERM)
     self.dumpJobMetrics()
     for jobId in self.jobsTimestamp:
         if self.jobsTimestamp[jobId]['endTime'] is None:
             self.jobsTimestamp[jobId]['endTime'] = time.time()
         if len(self.jobsRuningRanks[jobId]) > 0:
             self.jobsTimestamp[jobId]['endTime'] = time.time()
     self.dumpJobsStartTime()
     #self.flushMessages()
     #self.updateFailedEventRanges()
     # final dump
     self.tmpLog.info('final dumping')
     self.updateEventRangesToDB(force=True, final=True)
     #self.db.dumpUpdates(True)
     self.tmpLog.info("post Exec job")
     self.postExecJob()
     self.tmpLog.info('stop')
     unblock_sig(signal.SIGTERM)
     sys.exit(0)
示例#10
0
文件: Yoda.py 项目: PanDAWMS/pilot
 def stop(self, signum=None, frame=None):
     self.tmpLog.info('stop signal %s received' % signum)
     block_sig(signum)
     signal.siginterrupt(signum, False)
     self.dumpJobMetrics()
     for jobId in self.jobsTimestamp:
         if self.jobsTimestamp[jobId]['endTime'] is None:
             self.jobsTimestamp[jobId]['endTime'] = time.time()
         if len(self.jobsRuningRanks[jobId]) > 0:
             self.jobsTimestamp[jobId]['endTime'] = time.time()
     self.dumpJobsStartTime()
     #self.flushMessages()
     #self.updateFailedEventRanges()
     # final dump
     self.tmpLog.info('final dumping')
     self.updateEventRangesToDB(force=True, final=True)
     #self.db.dumpUpdates(True)
     self.tmpLog.info("post Exec job")
     self.postExecJob()
     self.tmpLog.info('stop')
     #signal.siginterrupt(signum, True)
     unblock_sig(signum)
示例#11
0
文件: Droid.py 项目: PanDAWMS/pilot
    def stop(self, signum=None, frame=None):
        self.__tmpLog.info('Rank %s: stop signal %s received' % (self.__rank, signum))
        self.__stop = True
        block_sig(signum)
        signal.siginterrupt(signum, False)
        if self.__esJobManager:
            self.__esJobManager.terminate()
        self.getAccountingMetrics()
        self.dumpJobMetrics()
        self.heartbeat()
        #self.__esJobManager.terminate()
        self.__esJobManager.flushMessages()
        self.updateOutputs(signal=True, final=True)

        self.__tmpLog.info("Rank %s: post exec job" % self.__rank)
        self.postExecJob()
        #self.__tmpLog.info("Rank %s: finish job" % self.__rank)
        #self.finishJob()

        self.__tmpLog.info('Rank %s: stop' % self.__rank)
        #signal.siginterrupt(signum, True)
        unblock_sig(signum)
示例#12
0
    def stop(self, signum=None, frame=None):
        self.__tmpLog.info('Rank %s: stop signal %s received' %
                           (self.__rank, signum))
        self.__stop = True
        block_sig(signum)
        signal.siginterrupt(signum, False)
        if self.__esJobManager:
            self.__esJobManager.terminate()
        self.getAccountingMetrics()
        self.dumpJobMetrics()
        self.heartbeat()
        #self.__esJobManager.terminate()
        self.__esJobManager.flushMessages()
        self.updateOutputs(signal=True, final=True)

        self.__tmpLog.info("Rank %s: post exec job" % self.__rank)
        self.postExecJob()
        #self.__tmpLog.info("Rank %s: finish job" % self.__rank)
        #self.finishJob()

        self.__tmpLog.info('Rank %s: stop' % self.__rank)
        #signal.siginterrupt(signum, True)
        unblock_sig(signum)
示例#13
0
                error_acronym, eventRangeID, error_diagnostics = self.extractErrorMessage(message)
                if eventRangeID != "":
                    try:
                        self.__log.error("Rank %s: !!WARNING!!2144!! Extracted error acronym %s and error diagnostics \'%s\' for event range %s" % (self.__rank, error_acronym, error_diagnostics, eventRangeID))
                        self.__eventRangesStatus[eventRangeID]['status'] = 'failed'
                        self.__eventRangesStatus[eventRangeID]['output'] = message
                        self.__outputMessage.append((eventRangeID, error_acronym, message))
                    except Exception, e:
                        self.__log.warning("Rank %s: output message format is not recognized: %s " % (self.__rank, message))
                        self.__log.warning("Rank %s: %s" % (self.__rank, str(e)))
                if "FATAL" in error_acronym:
                    self.__log.error("Rank %s: !!WARNING!!2146!! A FATAL error was encountered, prepare to finish" % (self.__rank))
                    self.terminate()
            else:
                self.__log.error("Rank %s: Received an unknown message: %s" % (self.__rank, message))
            unblock_sig(signal.SIGTERM)
            return True

    def findChildProcesses(self,pid):
        command = "/bin/ps -e --no-headers -o pid -o ppid -o fname"
        status,output = commands.getstatusoutput(command)
        #print "ps output: %s" % output

        pieces = []
        result = []
        for line in output.split("\n"):
            pieces= line.split()
            try:
                value=int(pieces[1])
            except Exception,e:
                #print "trouble interpreting ps output %s: \n %s" % (e,pieces)
示例#14
0
                error_acronym, eventRangeID, error_diagnostics = self.extractErrorMessage(message)
                if eventRangeID != "":
                    try:
                        self.__log.error("Rank %s: !!WARNING!!2144!! Extracted error acronym %s and error diagnostics \'%s\' for event range %s" % (self.__rank, error_acronym, error_diagnostics, eventRangeID))
                        self.__eventRangesStatus[eventRangeID]['status'] = 'failed'
                        self.__eventRangesStatus[eventRangeID]['output'] = message
                        self.__outputMessage.append((eventRangeID, error_acronym, message))
                    except Exception, e:
                        self.__log.warning("Rank %s: output message format is not recognized: %s " % (self.__rank, message))
                        self.__log.warning("Rank %s: %s" % (self.__rank, str(e)))
                if "FATAL" in error_acronym:
                    self.__log.error("Rank %s: !!WARNING!!2146!! A FATAL error was encountered, prepare to finish" % (self.__rank))
                    self.terminate()
            else:
                self.__log.error("Rank %s: Received an unknown message: %s" % (self.__rank, message))
            unblock_sig(signal.SIGTERM)
            return True

    def findChildProcesses(self,pid):
        command = "/bin/ps -e --no-headers -o pid -o ppid -o fname"
        status,output = commands.getstatusoutput(command)
        #print "ps output: %s" % output

        pieces = []
        result = []
        for line in output.split("\n"):
            pieces= line.split()
            try:
                value=int(pieces[1])
            except Exception,e:
                #print "trouble interpreting ps output %s: \n %s" % (e,pieces)