def connect(self): self.__msgClient = MessageClient(self.__mindName) self.__msgClient.subscribeToMessage("ProcessTask", self.__processTask) self.__msgClient.subscribeToDisconnect(self.__disconnected) result = self.__msgClient.connect( executorTypes=list(self.__modules), maxTasks=self.__maxTasks, extraArgs=self.__extraArgs ) if result["OK"]: self.__aliveLock.alive() gLogger.info("Connected to %s" % self.__mindName) return result
def initializeHandler(cls, serviceInfoDict): cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") result = cls.msgClient.connect(JobManager=True) if not result['OK']: cls.log.error("Cannot connect to OptimizationMind!", result['Message']) return result
def addClient(self, cliTrid, destination, clientInitParams, connectParams): if cliTrid in self.__byClient: gLogger.fatal("Trid is duplicated!! this shouldn't happen") return msgClient = MessageClient(destination, **clientInitParams) msgClient.subscribeToDisconnect(self.__srvDisconnect) msgClient.subscribeToAllMessages(self.msgFromSrv) msgClient.setUniqueName(connectParams[0]) result = msgClient.connect(**connectParams[1]) if not result["OK"]: return result self.__inOutLock.acquire() try: self.__byClient[cliTrid] = {"srvEnd": msgClient, "srvTrid": msgClient.getTrid(), "srvName": destination} self.__srvToCliTrid[msgClient.getTrid()] = cliTrid finally: self.__inOutLock.release() return result
def connect( self ): self.__msgClient = MessageClient( self.__mindName ) self.__msgClient.subscribeToMessage( 'ProcessTask', self.__processTask ) self.__msgClient.subscribeToDisconnect( self.__disconnected ) result = self.__msgClient.connect( executorTypes = list( self.__modules.keys() ), maxTasks = self.__maxTasks, extraArgs = self.__extraArgs ) if result[ 'OK' ]: self.__aliveLock.alive() gLogger.info( "Connected to %s" % self.__mindName ) return result
def initializeHandler(cls, serviceInfoDict): """Initialization of DB objects and OptimizationMind""" try: result = ObjectLoader().loadObject( "WorkloadManagementSystem.DB.JobDB", "JobDB") if not result["OK"]: return result cls.jobDB = result["Value"](parentLogger=cls.log) result = ObjectLoader().loadObject( "WorkloadManagementSystem.DB.JobLoggingDB", "JobLoggingDB") if not result["OK"]: return result cls.jobLoggingDB = result["Value"](parentLogger=cls.log) result = ObjectLoader().loadObject( "WorkloadManagementSystem.DB.TaskQueueDB", "TaskQueueDB") if not result["OK"]: return result cls.taskQueueDB = result["Value"](parentLogger=cls.log) result = ObjectLoader().loadObject( "WorkloadManagementSystem.DB.PilotAgentsDB", "PilotAgentsDB") if not result["OK"]: return result cls.pilotAgentsDB = result["Value"](parentLogger=cls.log) except RuntimeError as excp: return S_ERROR("Can't connect to DB: %s" % excp) cls.pilotsLoggingDB = None enablePilotsLogging = Operations().getValue( "/Services/JobMonitoring/usePilotsLoggingFlag", False) if enablePilotsLogging: try: result = ObjectLoader().loadObject( "WorkloadManagementSystem.DB.PilotsLoggingDB", "PilotsLoggingDB") if not result["OK"]: return result cls.pilotsLoggingDB = result["Value"](parentLogger=cls.log) except RuntimeError as excp: return S_ERROR("Can't connect to DB: %s" % excp) cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") result = cls.msgClient.connect(JobManager=True) if not result["OK"]: cls.log.warn("Cannot connect to OptimizationMind!", result["Message"]) return S_OK()
def initializeHandler(cls, serviceInfoDict): """ Initialization of DB objects and OptimizationMind """ cls.jobDB = JobDB() cls.jobLoggingDB = JobLoggingDB() cls.taskQueueDB = TaskQueueDB() cls.pilotAgentsDB = PilotAgentsDB() cls.pilotsLoggingDB = None enablePilotsLogging = Operations().getValue( '/Services/JobMonitoring/usePilotsLoggingFlag', False) if enablePilotsLogging: cls.pilotsLoggingDB = PilotsLoggingDB() cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK()
def initializeHandler(cls, serviceInfoDict): cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK()
class MindCluster( object ): def __init__( self, mindName, aliveLock ): self.__mindName = mindName self.__modules = {} self.__maxTasks = 1 self.__reconnectSleep = 1 self.__reconnectRetries = 10 self.__extraArgs = {} self.__instances = {} self.__instanceLock = threading.Lock() self.__aliveLock = aliveLock def updateMaxTasks( self, mt ): self.__maxTasks = max( self.__maxTasks, mt ) def addModule( self, name, exeClass ): self.__modules[ name ] = exeClass self.__maxTasks = max( self.__maxTasks, exeClass.ex_getOption( "MaxTasks" ) ) self.__reconnectSleep = max( self.__reconnectSleep, exeClass.ex_getOption( "ReconnectSleep" ) ) self.__reconnectRetries = max( self.__reconnectRetries, exeClass.ex_getOption( "ReconnectRetries" ) ) self.__extraArgs[ name ] = exeClass.ex_getExtraArguments() def connect( self ): self.__msgClient = MessageClient( self.__mindName ) self.__msgClient.subscribeToMessage( 'ProcessTask', self.__processTask ) self.__msgClient.subscribeToDisconnect( self.__disconnected ) result = self.__msgClient.connect( executorTypes = list( self.__modules.keys() ), maxTasks = self.__maxTasks, extraArgs = self.__extraArgs ) if result[ 'OK' ]: self.__aliveLock.alive() gLogger.info( "Connected to %s" % self.__mindName ) return result def __disconnected( self, msgClient ): retryCount = 0 while True: gLogger.notice( "Trying to reconnect to %s" % self.__mindName ) result = self.__msgClient.connect( executorTypes = list( self.__modules.keys() ), maxTasks = self.__maxTasks, extraArgs = self.__extraArgs ) if result[ 'OK' ]: if retryCount >= self.__reconnectRetries: self.__aliveLock.alive() gLogger.notice( "Reconnected to %s" % self.__mindName ) return S_OK() retryCount += 1 if retryCount == self.__reconnectRetries: self.__aliveLock.alive() gLogger.info( "Connect error failed: %s" % result[ 'Message' ] ) gLogger.notice( "Failed to reconnect. Sleeping for %d seconds" % self.__reconnectSleep ) time.sleep( self.__reconnectSleep ) def __storeInstance( self, modName, modObj ): self.__instanceLock.acquire() try: self.__instances[ modName ].append( modObj ) finally: self.__instanceLock.release() def __getInstance( self, moduleName ): self.__instanceLock.acquire() try: if moduleName not in self.__instances: self.__instances[ moduleName ] = [] try: return S_OK( self.__instances[ moduleName ].pop( 0 ) ) except IndexError: pass finally: self.__instanceLock.release() try: modObj = self.__modules[ moduleName ] except KeyError: return S_ERROR( "Unknown %s executor" ) modInstance = modObj() return S_OK( modInstance ) def __sendExecutorError( self, eType, taskId, errMsg ): result = self.__msgClient.createMessage( "ExecutorError" ) if not result[ 'OK' ]: return result msgObj = result[ 'Value' ] msgObj.taskId = taskId msgObj.errorMsg = errMsg msgObj.eType = eType return self.__msgClient.sendMessage( msgObj ) def __processTask( self, msgObj ): eType = msgObj.eType taskId = msgObj.taskId taskStub = msgObj.taskStub result = self.__moduleProcess( eType, taskId, taskStub ) if not result[ 'OK' ]: return self.__sendExecutorError( eType, taskId, result[ 'Message' ] ) msgName, taskStub, extra = result[ 'Value' ] result = self.__msgClient.createMessage( msgName ) if not result[ 'OK' ]: return self.__sendExecutorError( eType, taskId, "Can't generate %s message: %s" % ( msgName, result[ 'Message' ] ) ) gLogger.verbose( "Task %s: Sending %s" % ( str( taskId ), msgName ) ) msgObj = result[ 'Value' ] msgObj.taskId = taskId msgObj.taskStub = taskStub if msgName == "TaskError": msgObj.errorMsg = extra msgObj.eType = eType elif msgName == "TaskFreeze": msgObj.freezeTime = extra return self.__msgClient.sendMessage( msgObj ) def __moduleProcess( self, eType, taskId, taskStub, fastTrackLevel = 0 ): result = self.__getInstance( eType ) if not result[ 'OK' ]: return result modInstance = result[ 'Value' ] try: result = modInstance._ex_processTask( taskId, taskStub ) except Exception as excp: gLogger.exception( "Error while processing task %s" % taskId, lException = excp ) return S_ERROR( "Error processing task %s: %s" % ( taskId, excp ) ) self.__storeInstance( eType, modInstance ) if not result[ 'OK' ]: return S_OK( ( 'TaskError', taskStub, "Error: %s" % result[ 'Message' ] ) ) taskStub, freezeTime, fastTrackType = result[ 'Value' ] if freezeTime: return S_OK( ( "TaskFreeze", taskStub, freezeTime ) ) if fastTrackType: if fastTrackLevel < 10 and fastTrackType in self.__modules: gLogger.notice( "Fast tracking task %s to %s" % ( taskId, fastTrackType ) ) return self.__moduleProcess( fastTrackType, taskId, taskStub, fastTrackLevel + 1 ) else: gLogger.notice( "Stopping %s fast track. Sending back to the mind" % ( taskId ) ) return S_OK( ( "TaskDone", taskStub, True ) )
return sendPingMsg( msgObj.msgClient, pongid + 1 ) def disconnectedCB( msgClient ): """ Reconnect :) """ retryCount = 0 while retryCount: result = msgClient.connect() if result[ 'OK' ]: return result time.sleep( 1 ) retryCount -= 1 return S_ERROR( "Could not reconnect... :P" ) if __name__ == "__main__": msgClient = MessageClient( "Framework/PingPong" ) msgClient.subscribeToMessage( 'Pong', pongCB ) msgClient.subscribeToDisconnect( disconnectedCB ) result = msgClient.connect() if not result[ 'OK' ]: print("CANNOT CONNECT: %s" % result['Message']) sys.exit(1) result = sendPingMsg( msgClient ) if not result[ 'OK' ]: print("CANNOT SEND PING: %s" % result['Message']) sys.exit(1) #Wait 10 secs of pingpongs :P time.sleep( 10 )
class MindCluster(object): def __init__(self, mindName, aliveLock): self.__mindName = mindName self.__modules = {} self.__maxTasks = 1 self.__reconnectSleep = 1 self.__reconnectRetries = 10 self.__extraArgs = {} self.__instances = {} self.__instanceLock = threading.Lock() self.__aliveLock = aliveLock def updateMaxTasks(self, mt): self.__maxTasks = max(self.__maxTasks, mt) def addModule(self, name, exeClass): self.__modules[name] = exeClass self.__maxTasks = max(self.__maxTasks, exeClass.ex_getOption("MaxTasks", 0)) self.__reconnectSleep = max(self.__reconnectSleep, exeClass.ex_getOption("ReconnectSleep", 0)) self.__reconnectRetries = max(self.__reconnectRetries, exeClass.ex_getOption("ReconnectRetries", 0)) self.__extraArgs[name] = exeClass.ex_getExtraArguments() def connect(self): self.__msgClient = MessageClient(self.__mindName) self.__msgClient.subscribeToMessage("ProcessTask", self.__processTask) self.__msgClient.subscribeToDisconnect(self.__disconnected) result = self.__msgClient.connect( executorTypes=list(self.__modules), maxTasks=self.__maxTasks, extraArgs=self.__extraArgs ) if result["OK"]: self.__aliveLock.alive() gLogger.info("Connected to %s" % self.__mindName) return result def __disconnected(self, msgClient): retryCount = 0 while True: gLogger.notice("Trying to reconnect to %s" % self.__mindName) result = self.__msgClient.connect( executorTypes=list(self.__modules), maxTasks=self.__maxTasks, extraArgs=self.__extraArgs ) if result["OK"]: if retryCount >= self.__reconnectRetries: self.__aliveLock.alive() gLogger.notice("Reconnected to %s" % self.__mindName) return S_OK() retryCount += 1 if retryCount == self.__reconnectRetries: self.__aliveLock.alive() gLogger.info("Connect error failed: %s" % result["Message"]) gLogger.notice("Failed to reconnect. Sleeping for %d seconds" % self.__reconnectSleep) time.sleep(self.__reconnectSleep) def __storeInstance(self, modName, modObj): self.__instanceLock.acquire() try: self.__instances[modName].append(modObj) finally: self.__instanceLock.release() def __getInstance(self, moduleName): self.__instanceLock.acquire() try: if moduleName not in self.__instances: self.__instances[moduleName] = [] try: return S_OK(self.__instances[moduleName].pop(0)) except IndexError: pass finally: self.__instanceLock.release() try: modObj = self.__modules[moduleName] except KeyError: return S_ERROR("Unknown %s executor") modInstance = modObj() return S_OK(modInstance) def __sendExecutorError(self, eType, taskId, errMsg): result = self.__msgClient.createMessage("ExecutorError") if not result["OK"]: return result msgObj = result["Value"] msgObj.taskId = taskId msgObj.errorMsg = errMsg msgObj.eType = eType return self.__msgClient.sendMessage(msgObj) def __processTask(self, msgObj): eType = msgObj.eType taskId = msgObj.taskId taskStub = msgObj.taskStub result = self.__moduleProcess(eType, taskId, taskStub) if not result["OK"]: return self.__sendExecutorError(eType, taskId, result["Message"]) msgName, taskStub, extra = result["Value"] result = self.__msgClient.createMessage(msgName) if not result["OK"]: return self.__sendExecutorError( eType, taskId, "Can't generate %s message: %s" % (msgName, result["Message"]) ) gLogger.verbose("Task %s: Sending %s" % (str(taskId), msgName)) msgObj = result["Value"] msgObj.taskId = taskId msgObj.taskStub = taskStub if msgName == "TaskError": msgObj.errorMsg = extra msgObj.eType = eType elif msgName == "TaskFreeze": msgObj.freezeTime = extra return self.__msgClient.sendMessage(msgObj) def __moduleProcess(self, eType, taskId, taskStub, fastTrackLevel=0): result = self.__getInstance(eType) if not result["OK"]: return result modInstance = result["Value"] try: result = modInstance._ex_processTask(taskId, taskStub) except Exception as excp: gLogger.exception("Error while processing task %s" % taskId, lException=excp) return S_ERROR("Error processing task %s: %s" % (taskId, excp)) self.__storeInstance(eType, modInstance) if not result["OK"]: return S_OK(("TaskError", taskStub, "Error: %s" % result["Message"])) taskStub, freezeTime, fastTrackType = result["Value"] if freezeTime: return S_OK(("TaskFreeze", taskStub, freezeTime)) if fastTrackType: if fastTrackLevel < 10 and fastTrackType in self.__modules: gLogger.notice("Fast tracking task %s to %s" % (taskId, fastTrackType)) return self.__moduleProcess(fastTrackType, taskId, taskStub, fastTrackLevel + 1) else: gLogger.notice("Stopping %s fast track. Sending back to the mind" % (taskId)) return S_OK(("TaskDone", taskStub, True))
return sendPingMsg( msgObj.msgClient, pongid + 1 ) def disconnectedCB( msgClient ): """ Reconnect :) """ retryCount = 0 while retryCount: result = msgClient.connect() if result[ 'OK' ]: return result time.sleep( 1 ) retryCount -= 1 return S_ERROR( "Could not reconnect... :P" ) if __name__ == "__main__": msgClient = MessageClient( "Framework/PingPong" ) msgClient.subscribeToMessage( 'Pong', pongCB ) msgClient.subscribeToDisconnect( disconnectedCB ) result = msgClient.connect() if not result[ 'OK' ]: print "CANNOT CONNECT: %s" % result[ 'Message' ] sys.exit(1) result = sendPingMsg( msgClient ) if not result[ 'OK' ]: print "CANNOT SEND PING: %s" % result[ 'Message' ] sys.exit(1) #Wait 10 secs of pingpongs :P time.sleep( 10 )