def run(self): # get updated status from server try: stat = StatusServer(self.cfg_params) warning_msg = stat.resynchClientSide() if warning_msg is not None: common.logger.info(warning_msg) except: pass # check whether the action is allowable self.check() # notify the server to clean the task csCommunicator = ServerCommunicator(self.server_name, self.server_port, self.cfg_params) taskuuid = str(common._db.queryTask('name')) try: csCommunicator.cleanTask(taskuuid) except Exception, e: msg = "Client Server comunication failed about cleanJobs: task \n" + taskuuid msg += "Only local working directory will be removed." common.logger.debug(msg) pass
def run(self): """ The main method of the class: kill a complete task """ common.logger.debug("Killer::run() called") # get updated status from server #inherited from StatusServer try: from StatusServer import StatusServer stat = StatusServer(self.cfg_params) warning_msg = stat.resynchClientSide() if warning_msg is not None: common.logger.info(warning_msg) except: pass task = common._db.getTask(self.range) toBeKilled = [] for job in task.jobs: #if job.runningJob['status'] not in ['C','E','KK','K','SU','SA','NS']: # commented for fast-kill at registration ,'SSE']: if job.runningJob['state'] in ['SubSuccess', 'SubRequested']: toBeKilled.append(job['jobId']) else: common.logger.info("Not possible to kill Job #"+str(job['jobId'])+\ " : Last action was: "+str(job.runningJob['state'])+\ " Status is "+str(job.runningJob['statusScheduler'])) pass if len(toBeKilled) > 0: ## register proxy ## csCommunicator = ServerCommunicator(self.server_name, self.server_port, self.cfg_params) taskuuid = str(common._db.queryTask('name')) ret = csCommunicator.killJobs(taskuuid, toBeKilled) del csCommunicator if ret != 0: msg = "ClientServer ERROR: %d raised during the communication.\n" % ret raise CrabException(msg) # printout the command result common._db.updateRunJob_(toBeKilled, [{ 'state': 'KillRequested' }] * len(toBeKilled)) common.logger.info( "Kill request for %d jobs succesfully sent to the server\n" % len(toBeKilled)) return
def performSubmission(self, firstSubmission=True): # create the communication session with the server frontend csCommunicator = ServerCommunicator(self.server_name, self.server_port, self.cfg_params) subOutcome = 0 TotJob = common._db.nJobs() # transfer remote dir to server self.cfg_params['CRAB.se_remote_dir'] = self.remotedir if firstSubmission == True: # check if the server is in drain mode if self.checkIfDrained(csCommunicator) == True: return # move the sandbox self.moveISB_SEAPI() # first time submit taskXML = self.serialize() # TODO fix not needed first field subOutcome = csCommunicator.submitNewTask(self.taskuuid, taskXML, self.submitRange, TotJob, taskType=self.taskType) else: # subsequent submissions and resubmit self.stateChange(self.submitRange, "SubRequested") if self.extended == 1: # update the Arguments XML file argsXML = common.work_space.shareDir() + 'arguments.xml' self.moveISB_SEAPI([argsXML]) taskXML = self.serialize() subOutcome = csCommunicator.submitNewTask(self.taskuuid, taskXML, self.submitRange, TotJob, taskType='extended') else: try: subOutcome = csCommunicator.subsequentJobSubmit( self.taskuuid, self.submitRange) except Exception, ex: ##change to specific exception ## clean sub. requested status self.stateChange(self.submitRange, "Created")
def resynchClientSide(self): """ get status from the server and aling back data on client """ task = common._db.getTask() self.task_unique_name = str(task['name']) # communicator allocation csCommunicator = ServerCommunicator(self.server_name, self.server_port, self.cfg_params) # align back data and print reportXML = None warning_msg = None max_get_status_tries = 5 for retry in xrange(max_get_status_tries): if retry > 0: delay = pow(2, retry - 1) common.logger.info( "Server status decoding problem. Try again in %d seconds" % delay) runCommand("/bin/sleep %d" % delay) handledXML = csCommunicator.getStatus(self.task_unique_name) reportXML, warning_msg = self.statusDecoding(handledXML) if reportXML is not None and warning_msg is None: break common.logger.debug(warning_msg) if warning_msg is not None: warning_msg = "WARNING: Unable to decompress status from server. Please issue crab -status again" common.logger.info(warning_msg) return warning_msg try: xmlStatus = minidom.parseString(reportXML) reportList = xmlStatus.getElementsByTagName('Job') common._db.deserXmlStatus(reportList) except Exception, e: warning_msg = "WARNING: Unable to extract status from XML file. Please issue crab -status again" common.logger.info(warning_msg) common.logger.debug("DUMP STATUS XML: %s" % str(reportXML)) common.logger.debug(str(e)) common.logger.debug(traceback.format_exc()) return warning_msg
def run(self): """ The main method of the class: kill a complete task """ common.logger.debug( "Killer::run() called") # get updated status from server #inherited from StatusServer try: from StatusServer import StatusServer stat = StatusServer(self.cfg_params) warning_msg = stat.resynchClientSide() if warning_msg is not None: common.logger.info(warning_msg) except: pass task = common._db.getTask(self.range) toBeKilled = [] for job in task.jobs: #if job.runningJob['status'] not in ['C','E','KK','K','SU','SA','NS']: # commented for fast-kill at registration ,'SSE']: if job.runningJob['state'] in ['SubSuccess','SubRequested']: toBeKilled.append(job['jobId']) else: common.logger.info("Not possible to kill Job #"+str(job['jobId'])+\ " : Last action was: "+str(job.runningJob['state'])+\ " Status is "+str(job.runningJob['statusScheduler'])) pass if len(toBeKilled)>0: ## register proxy ## csCommunicator = ServerCommunicator(self.server_name, self.server_port, self.cfg_params) taskuuid = str(common._db.queryTask('name')) ret = csCommunicator.killJobs( taskuuid, toBeKilled) del csCommunicator if ret != 0: msg = "ClientServer ERROR: %d raised during the communication.\n"%ret raise CrabException(msg) # printout the command result common._db.updateRunJob_(toBeKilled, [{'state':'KillRequested'}]*len(toBeKilled)) common.logger.info("Kill request for %d jobs succesfully sent to the server\n"%len(toBeKilled) ) return
def resynchClientSide(self): """ get status from the server and aling back data on client """ task = common._db.getTask() self.task_unique_name = str(task['name']) # communicator allocation csCommunicator = ServerCommunicator(self.server_name, self.server_port, self.cfg_params) # align back data and print reportXML = None warning_msg = None max_get_status_tries = 5 for retry in xrange(max_get_status_tries): if retry > 0 : delay = pow(2,retry-1) common.logger.info("Server status decoding problem. Try again in %d seconds"%delay) runCommand("/bin/sleep %d"%delay) handledXML = csCommunicator.getStatus( self.task_unique_name ) reportXML, warning_msg = self.statusDecoding(handledXML) if reportXML is not None and warning_msg is None: break common.logger.debug(warning_msg) if warning_msg is not None: warning_msg = "WARNING: Unable to decompress status from server. Please issue crab -status again" common.logger.info(warning_msg) return warning_msg try: xmlStatus = minidom.parseString(reportXML) reportList = xmlStatus.getElementsByTagName('Job') common._db.deserXmlStatus(reportList) except Exception, e: warning_msg = "WARNING: Unable to extract status from XML file. Please issue crab -status again" common.logger.info(warning_msg) common.logger.debug("DUMP STATUS XML: %s"%str(reportXML)) common.logger.debug( str(e) ) common.logger.debug( traceback.format_exc() ) return warning_msg
def notifyRetrievalToServer(self, fileAndJobList): retrievedFilesJodId = [] for jid in fileAndJobList: if not os.path.exists(fileAndJobList[jid]): # it means the file has been untarred retrievedFilesJodId.append(jid) common.logger.debug( "List of retrieved files notified to server: %s"%str(retrievedFilesJodId) ) # notify to the server that output have been retrieved successfully. proxy from StatusServer if len(retrievedFilesJodId) > 0: csCommunicator = ServerCommunicator(self.server_name, self.server_port, self.cfg_params) try: csCommunicator.outputRetrieved(self.taskuuid, retrievedFilesJodId) except Exception, e: msg = "Client Server comunication failed about outputRetrieved: jobs "+(str(retrievedFilesJodId)) common.logger.debug( msg) pass
def run(self): """ The main method of the class: WorkflowHandler """ common.logger.debug( "WorkflowStopper::run() called") taskuuid = str(common._db.queryTask('name')) csCommunicator = ServerCommunicator(self.server_name, self.server_port, self.cfg_params) ret = csCommunicator.StopWorkflow( taskuuid ) del csCommunicator if ret != 0: msg = "ClientServer ERROR: %d raised during the communication.\n"%ret raise CrabException(msg) common.logger.info("Stop Workflow request succesfully sent to the server\n" ) return
def performSubmission(self, firstSubmission=True): # create the communication session with the server frontend csCommunicator = ServerCommunicator(self.server_name, self.server_port, self.cfg_params) subOutcome = 0 TotJob = common._db.nJobs() # transfer remote dir to server self.cfg_params['CRAB.se_remote_dir'] = self.remotedir if firstSubmission==True: # check if the server is in drain mode if self.checkIfDrained(csCommunicator)==True: return # move the sandbox self.moveISB_SEAPI() # first time submit taskXML= self.serialize() # TODO fix not needed first field subOutcome = csCommunicator.submitNewTask(self.taskuuid, taskXML, self.submitRange,TotJob,taskType=self.taskType) else: # subsequent submissions and resubmit self.stateChange( self.submitRange, "SubRequested" ) if self.extended==1: # update the Arguments XML file argsXML = common.work_space.shareDir()+'arguments.xml' self.moveISB_SEAPI([argsXML]) taskXML= self.serialize() subOutcome = csCommunicator.submitNewTask(self.taskuuid, taskXML, self.submitRange,TotJob,taskType='extended') else: try: subOutcome = csCommunicator.subsequentJobSubmit(self.taskuuid, self.submitRange) except Exception, ex: ##change to specific exception ## clean sub. requested status self.stateChange( self.submitRange, "Created" )
def run(self): # get updated status from server try: stat = StatusServer(self.cfg_params) warning_msg = stat.resynchClientSide() if warning_msg is not None: common.logger.info(warning_msg) except: pass # check whether the action is allowable self.check() # notify the server to clean the task csCommunicator = ServerCommunicator(self.server_name, self.server_port, self.cfg_params) taskuuid = str(common._db.queryTask('name')) try: csCommunicator.cleanTask(taskuuid) except Exception, e: msg = "Client Server comunication failed about cleanJobs: task \n" + taskuuid msg += "Only local working directory will be removed." common.logger.debug( msg) pass
def notifyRetrievalToServer(self, fileAndJobList): retrievedFilesJodId = [] for jid in fileAndJobList: if not os.path.exists(fileAndJobList[jid]): # it means the file has been untarred retrievedFilesJodId.append(jid) common.logger.debug("List of retrieved files notified to server: %s" % str(retrievedFilesJodId)) # notify to the server that output have been retrieved successfully. proxy from StatusServer if len(retrievedFilesJodId) > 0: csCommunicator = ServerCommunicator(self.server_name, self.server_port, self.cfg_params) try: csCommunicator.outputRetrieved(self.taskuuid, retrievedFilesJodId) except Exception, e: msg = "Client Server comunication failed about outputRetrieved: jobs " + ( str(retrievedFilesJodId)) common.logger.debug(msg) pass
def __init__(self, port): self.port = port # Create a TCP server socket and start listening self.serverSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.serverSocket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.serverSocket.bind(('', self.port)) self.serverSocket.listen() # Begin accepting connections while True: # Accept a connection and store the connection and address connection, address = self.serverSocket.accept() print("Connection from " + str(address)) # Create new ServerCommunicator for the connection sc = ServerCommunicator(connection, address) # TODO: Spawn new handler thread for the connection and keep # listening # Need to call sc.handleClient(), but on an independent thread threading.Thread(target=sc.handleClient).start()