def monitoring( self, loop, parentthread, output ): self.initialTiming = os.times() accountingReport = AccountingJob() accountingReport.setStartTime() numberJobsFlag = True numberJobs = 0 numberStartedJobsDict = {} numberEndingJobsDict = {} job_pattern = re.compile( 'Job =.*?,' ) job_pattern_2 = re.compile( 'Job =.*?\n' ) jobid = int( re.split( "_", re.split( "/", output )[int( len( re.split( "/", output ) ) - 1 )] )[0] ) cmd = '/bin/chmod 555 ' + self.getinfo returned = self.commandLaunch( cmd ) while parentthread.isAlive(): time.sleep( loop ) if numberJobsFlag: cmd = self.getinfo + ' -c step1' returned = self.commandLaunch( cmd ) self.log.info( 'InteractiveJobMonitorThread:step1:numJobs:', returned ) if returned != None: if ( returned['Value'][1] != "" ): if re.split( "=", returned['Value'][1] )[1].strip().isdigit(): numberJobs = int( re.split( "=", returned['Value'][1] )[1] ) if ( numberJobs != 0 ): numberJobsFlag = False BigDataDB.setJobStatus( jobid, "Running" ) else: cmd = self.getinfo + ' -c step2' returned = self.commandLaunch( cmd ) self.log.info( 'InteractiveJobMonitorThread:step2:startedJobs:', returned ) if returned != "": if ( returned['Value'][1] != "" ): startedJobs = job_pattern.findall( returned['Value'][1] ) self.log.info( 'step2:startedJobs:', startedJobs ) cmd = self.getinfo + ' -c step3' returned = self.commandLaunch( cmd ) self.log.info( 'InteractiveJobMonitorThread:step3:endedJobs:', returned ) if returned != "": if ( returned['Value'][1] != "" ): finishedJobs = job_pattern_2.findall( returned['Value'][1] ) self.log.info( 'step3:finishedJobs:', finishedJobs ) if ( len( finishedJobs ) == numberJobs ): BigDataDB.setJobStatus( jobid, "Done" ) BigDataDB.setHadoopID( jobid, finishedJobs ) self.__updateSandBox( jobid, output ) #Update Accounting EXECUTION_RESULT = {} EXECUTION_RESULT['CPU'] = [] finalStat = os.times() for i in range( len( finalStat ) ): EXECUTION_RESULT['CPU'].append( finalStat[i] - self.initialTiming[i] ) utime, stime, cutime, cstime, elapsed = EXECUTION_RESULT['CPU'] cpuTime = utime + stime + cutime + cstime execTime = elapsed result = jobDB.getJobAttributes( jobid ) getting = result['Value'] acData = { 'User' : getting['Owner'], 'UserGroup' : getting['OwnerGroup'], 'JobGroup' : 'cesga', 'JobType' : 'User', 'JobClass' : 'unknown', 'ProcessingType' : 'unknown', 'FinalMajorStatus' : getting['Status'], 'FinalMinorStatus' : getting['MinorStatus'], 'CPUTime' : cpuTime, 'Site' : getting['Site'], # Based on the factor to convert raw CPU to Normalized units (based on the CPU Model) 'NormCPUTime' : 0, 'ExecTime' : cpuTime, 'InputDataSize' : 0, 'OutputDataSize' : 0, 'InputDataFiles' : 0, 'OutputDataFiles' : 0, 'DiskSpace' : 0, 'InputSandBoxSize' : 0, 'OutputSandBoxSize' : 0, 'ProcessedEvents' : 0 } accountingReport.setEndTime() accountingReport.setValuesFromDict( acData ) result = accountingReport.commit()
def execute(self): """Main Agent code: 1.- Query BigDataDB for existing Running, Queue, or Submitted jobs 2.- Ask about the status 3.- Change the status into DB in the case of had changed """ self.pendingJobs["Submitted"] = BigDataDB.getBigDataJobsByStatus("Submitted") self.pendingJobs["Running"] = BigDataDB.getBigDataJobsByStatus("Running") self.pendingJobs["Unknown"] = BigDataDB.getBigDataJobsByStatus("Unknown") self.__getMonitoringPools() self.log.verbose("monitoring pools", self.monitoringEndPoints) for status in self.pendingJobs: self.log.verbose("Analizing %s jobs" % status) JobStatus = 0 if self.pendingJobs[status]["OK"]: for jobId in self.pendingJobs[status]["Value"]: self.log.verbose("Analizing job %s" % jobId) getSoftIdAndSiteName = BigDataDB.getSoftwareJobIDByJobID(jobId[0]) self.log.verbose("Site and SoftID:", getSoftIdAndSiteName) for runningEndPoint in self.monitoringEndPoints: if (self.monitoringEndPoints[runningEndPoint]["NameNode"] == getSoftIdAndSiteName[0][1]) and ( getSoftIdAndSiteName[0][0] != "" ): # Depending on the BigData Software the Query should be different if self.monitoringEndPoints[runningEndPoint]["BigDataSoftware"] == "hadoop": if self.monitoringEndPoints[runningEndPoint]["BigDataSoftwareVersion"] == "hdv1": if ( self.monitoringEndPoints[runningEndPoint]["HighLevelLanguage"]["HLLName"] == "none" ): self.log.info( "Hadoop V.1 Monitoring submmission command with Hadoop jobID: ", getSoftIdAndSiteName[0][0], ) from BigDataDIRAC.WorkloadManagementSystem.Client.HadoopV1Client import ( HadoopV1Client, ) HadoopV1cli = HadoopV1Client( self.monitoringEndPoints[runningEndPoint]["User"], self.monitoringEndPoints[runningEndPoint]["PublicIP"], self.monitoringEndPoints[runningEndPoint]["Port"], ) JobStatus = HadoopV1cli.jobStatus( getSoftIdAndSiteName[0][0], self.monitoringEndPoints[runningEndPoint]["User"], self.monitoringEndPoints[runningEndPoint]["PublicIP"], ) if (JobStatus["OK"] == True) and ( self.monitoringEndPoints[runningEndPoint]["IsInteractive"] == "1" ): if JobStatus["Value"][1].strip() == "Succeded": result = HadoopV1cli.newJob( self.__tmpSandBoxDir, jobId[0], getSoftIdAndSiteName[0][0] ) if result["OK"] == True: result = BigDataDB.updateHadoopIDAndJobStatus( jobId[0], result["Value"] ) BigDataDB.setJobStatus(jobId[0], "Running") JobStatus["OK"] = False else: self.log.info("New result from new Job", result) if JobStatus["OK"] == True: if JobStatus["Value"][1].strip() == "Succeded": BigDataDB.setJobStatus(jobId[0], "Done") if self.monitoringEndPoints[runningEndPoint]["IsInteractive"] == "1": self.__updateInteractiveSandBox( jobId[0], self.monitoringEndPoints[runningEndPoint]["BigDataSoftware"], self.monitoringEndPoints[runningEndPoint][ "BigDataSoftwareVersion" ], self.monitoringEndPoints[runningEndPoint]["HighLevelLanguage"][ "HLLName" ], self.monitoringEndPoints[runningEndPoint]["HighLevelLanguage"][ "HLLVersion" ], HadoopV1cli, ) else: self.__updateSandBox( jobId[0], self.monitoringEndPoints[runningEndPoint]["BigDataSoftware"], self.monitoringEndPoints[runningEndPoint][ "BigDataSoftwareVersion" ], self.monitoringEndPoints[runningEndPoint]["HighLevelLanguage"][ "HLLName" ], self.monitoringEndPoints[runningEndPoint]["HighLevelLanguage"][ "HLLVersion" ], HadoopV1cli, ) getStatus = HadoopV1cli.jobCompleteStatus(getSoftIdAndSiteName[0][0]) if getStatus["OK"]: result = self.getJobFinalStatusInfo(getStatus["Value"][1]) if result["OK"]: self.sendJobAccounting(result["Value"], jobId[0]) if self.cleanDataAfterFinish: self.__deleteData(jobId[0], HadoopV1cli) if JobStatus["Value"][1].strip() == "Unknown": BigDataDB.setJobStatus(jobId[0], "Submitted") if JobStatus["Value"][1].strip() == "Running": BigDataDB.setJobStatus(jobId[0], "Running") if self.monitoringEndPoints[runningEndPoint]["BigDataSoftware"] == "hadoop": if self.monitoringEndPoints[runningEndPoint]["BigDataSoftwareVersion"] == "hdv2": if ( self.monitoringEndPoints[runningEndPoint]["HighLevelLanguage"]["HLLName"] == "none" ): self.log.info( "Hadoop V.2 Monitoring submmission command with Hadoop jobID: ", getSoftIdAndSiteName[0][0], ) from BigDataDIRAC.WorkloadManagementSystem.Client.HadoopV2Client import ( HadoopV2Client, ) HadoopV2cli = HadoopV2Client( self.monitoringEndPoints[runningEndPoint]["User"], self.monitoringEndPoints[runningEndPoint]["PublicIP"], ) JobStatus = HadoopV2cli.jobStatus( getSoftIdAndSiteName[0][0], self.monitoringEndPoints[runningEndPoint]["User"], self.monitoringEndPoints[runningEndPoint]["PublicIP"], ) if (JobStatus["OK"] == True) and ( self.monitoringEndPoints[runningEndPoint]["IsInteractive"] == "1" ): if JobStatus["Value"].strip() == "Succeded": result = HadoopV2cli.newJob( self.__tmpSandBoxDir, jobId[0], getSoftIdAndSiteName[0][0] ) if result["OK"] == True: result = BigDataDB.updateHadoopIDAndJobStatus( jobId[0], result["Value"] ) BigDataDB.setJobStatus(jobId[0], "Running") JobStatus["OK"] = False else: self.log.info("New result from new Job", result) if JobStatus["OK"] == True: if JobStatus["Value"] == "Succeded": BigDataDB.setJobStatus(jobId[0], "Done") if self.monitoringEndPoints[runningEndPoint]["IsInteractive"] == "1": self.__updateInteractiveSandBox( jobId[0], self.monitoringEndPoints[runningEndPoint]["BigDataSoftware"], self.monitoringEndPoints[runningEndPoint][ "BigDataSoftwareVersion" ], self.monitoringEndPoints[runningEndPoint]["HighLevelLanguage"][ "HLLName" ], self.monitoringEndPoints[runningEndPoint]["HighLevelLanguage"][ "HLLVersion" ], HadoopV2cli, ) else: self.__updateSandBox( jobId[0], self.monitoringEndPoints[runningEndPoint]["BigDataSoftware"], self.monitoringEndPoints[runningEndPoint][ "BigDataSoftwareVersion" ], self.monitoringEndPoints[runningEndPoint]["HighLevelLanguage"][ "HLLName" ], self.monitoringEndPoints[runningEndPoint]["HighLevelLanguage"][ "HLLVersion" ], HadoopV2cli, ) getStatus = HadoopV2cli.jobCompleteStatus(getSoftIdAndSiteName[0][0]) if getStatus["OK"]: result = self.getJobFinalStatusInfo(getStatus["Value"][1]) if result["OK"]: self.sendJobAccounting(result["Value"], jobId[0]) # if self.cleanDataAfterFinish: # self.__deleteData( jobId[0], HadoopV2cli ) if JobStatus["Value"] == "Unknown": BigDataDB.setJobStatus(jobId[0], "Submitted") if JobStatus["Value"] == "Running": BigDataDB.setJobStatus(jobId[0], "Running") return DIRAC.S_OK()