def saveJobDb(self, jobResourceDic): print('>>> Saving job related cpu/memory information into sqlite3 ...') common.debug('Saving job resource info into sqlite3 ...') jobList = list(jobResourceDic.keys()) jobRangeDic = common.getJobRangeDic(jobList) keyList = ['SAMPLE_TIME', 'HOST_NAME', 'CPU', 'MEMORY'] keyString = sqlite3_common.genSqlTableKeyString(keyList) for jobRange in jobRangeDic.keys(): jobResourceSqlDic = {} dbFile = str(self.dbPath) +'/job/' + str(jobRange) + '.db' (result, dbConn) = sqlite3_common.connectDbFile(dbFile, mode='write') if result == 'passed': jobTableList = sqlite3_common.getSqlTableList(dbFile, dbConn) else: jobTableList = [] print('*Error*: Failed on connecting sqlite3 database "' + str(dbFile) + '".') continue for job in jobRangeDic[jobRange]: jobResourceSqlDic[job] = { 'drop': False, 'keyString': '', 'valueString': '', } tableName = job print(' Sampling for job "' + str(job) + '" ...') if tableName in jobTableList: returnCode = self.checkOldSqlTable(dbFile, dbConn, tableName) if returnCode == 1: jobResourceSqlDic[job]['drop'] = True jobResourceSqlDic[job]['keyString'] = keyString else: jobResourceSqlDic[job]['keyString'] = keyString valueList = [sampleTime, hostname, jobResourceDic[job]['cpu'], jobResourceDic[job]['memory']] valueString = sqlite3_common.genSqlTableValueString(valueList) jobResourceSqlDic[job]['valueString'] = valueString for job in jobResourceSqlDic.keys(): tableName = job if jobResourceSqlDic[job]['drop']: print(' Dropping table "' + str(tableName) + '" ...') sqlite3_common.dropSqlTable(dbFile, dbConn, tableName, commit=False) if jobResourceSqlDic[job]['keyString'] != '': print(' Creating table "' + str(tableName) + '" ...') sqlite3_common.createSqlTable(dbFile, dbConn, tableName, jobResourceSqlDic[job]['keyString'], commit=False) if jobResourceSqlDic[job]['valueString'] != '': print(' Updating table "' + str(tableName) + '" with content "' + str(jobResourceSqlDic[job]['valueString']) + '" ...') sqlite3_common.insertIntoSqlTable(dbFile, dbConn, tableName, jobResourceSqlDic[job]['valueString'], commit=False) if result == 'passed': dbConn.commit() dbConn.close() common.debug('Saving job resource info done.')
def connectJobDb(self, job): jobRangeDic = common.getJobRangeDic([ job, ]) jobRangeList = list(jobRangeDic.keys()) jobRange = jobRangeList[0] dbFile = str(self.dbPath) + '/job/' + str(jobRange) + '.db' (result, dbConn) = sqlite3_common.connectDbFile(dbFile, mode='read') if result != 'passed': tableList = [] print('*Error*: Failed on connecting sqlite3 database "' + str(dbFile) + '".') else: tableList = sqlite3_common.getSqlTableList(dbFile, dbConn) return (result, dbFile, dbConn, tableList)
def sampleJobInfo(self): """ Sample job info, especially the memory usage info. """ self.getDateInfo() print('>>> Sampling job info ...') command = 'bjobs -u all -r -UF' bjobsDic = lsf_common.getBjobsUfInfo(command) jobList = list(bjobsDic.keys()) jobRangeDic = common.getJobRangeDic(jobList) jobSqlDic = {} keyList = ['sampleTime', 'mem'] for jobRange in jobRangeDic.keys(): jobDbFile = str(self.dbPath) + '/job/' + str(jobRange) + '.db' (result, jobDbConn) = sqlite3_common.connectDbFile(jobDbFile, mode='read') if result == 'passed': jobTableList = sqlite3_common.getSqlTableList( jobDbFile, jobDbConn) else: jobTableList = [] for job in jobRangeDic[jobRange]: jobTableName = 'job_' + str(job) print(' Sampling for job "' + str(job) + '" ...') jobSqlDic[job] = { 'drop': False, 'keyString': '', 'valueString': '', } # If job table (with old data) has been on the jobDbFile, drop it. if jobTableName in jobTableList: dataDic = sqlite3_common.getSqlTableData( jobDbFile, jobDbConn, jobTableName, ['sampleTime']) if dataDic: if len(dataDic['sampleTime']) > 0: lastSampleTime = dataDic['sampleTime'][-1] lastSeconds = int( time.mktime( datetime.datetime.strptime( str(lastSampleTime), "%Y%m%d_%H%M%S").timetuple())) if self.currentSeconds - lastSeconds > 3600: common.printWarning( ' *Warning*: table "' + str(jobTableName) + '" already existed even one hour ago, will drop it.' ) jobSqlDic[job]['drop'] = True jobTableList.remove(jobTableName) # If job table is not on the jobDbFile, create it. if jobTableName not in jobTableList: keyString = sqlite3_common.genSqlTableKeyString(keyList) jobSqlDic[job]['keyString'] = keyString # Insert sql table value. valueList = [self.sampleTime, bjobsDic[job]['mem']] valueString = sqlite3_common.genSqlTableValueString(valueList) jobSqlDic[job]['valueString'] = valueString if result == 'passed': jobDbConn.commit() jobDbConn.close() for jobRange in jobRangeDic.keys(): jobDbFile = str(self.dbPath) + '/job/' + str(jobRange) + '.db' (result, jobDbConn) = sqlite3_common.connectDbFile(jobDbFile, mode='write') if result != 'passed': return for job in jobRangeDic[jobRange]: jobTableName = 'job_' + str(job) if jobSqlDic[job]['drop']: sqlite3_common.dropSqlTable(jobDbFile, jobDbConn, jobTableName, commit=False) if jobSqlDic[job]['keyString'] != '': sqlite3_common.createSqlTable(jobDbFile, jobDbConn, jobTableName, jobSqlDic[job]['keyString'], commit=False) if jobSqlDic[job]['valueString'] != '': sqlite3_common.insertIntoSqlTable( jobDbFile, jobDbConn, jobTableName, jobSqlDic[job]['valueString'], commit=False) jobDbConn.commit() jobDbConn.close() print(' Committing the update to sqlite3 ...') print(' Done (' + str(len(jobList)) + ' jobs).')
def drawJobMemCurve(self, job): """ Draw memory usage curve for specified job. """ jobRangeDic = common.getJobRangeDic([ job, ]) jobRangeList = list(jobRangeDic.keys()) jobRange = jobRangeList[0] self.jobDbFile = str( config.dbPath) + '/monitor/job/' + str(jobRange) + '.db' (self.jobDbFileConnectResult, self.jobDbConn) = sqlite3_common.connectDbFile(self.jobDbFile) if self.jobDbFileConnectResult == 'failed': common.printWarning( '*Warning*: Failed on connectiong job database file "' + str(self.jobDbFile) + '".') return runTimeList = [] memList = [] if self.jobFirstLoad: common.printWarning( '*Warning*: It is the first time loading job database, it may cost a little time ...' ) self.jobFirstLoad = False print('Getting history of job memory usage for job "' + str(job) + '".') tableName = 'job_' + str(job) dataDic = sqlite3_common.getSqlTableData(self.jobDbFile, self.jobDbConn, tableName, ['sampleTime', 'mem']) if not dataDic: common.printWarning('*Warning*: job information is missing for "' + str(job) + '".') return else: runTimeList = dataDic['sampleTime'] memList = dataDic['mem'] realRunTimeList = [] realMemList = [] firstRunTime = datetime.datetime.strptime(str( runTimeList[0]), '%Y%m%d_%H%M%S').timestamp() for i in range(len(runTimeList)): runTime = runTimeList[i] currentRunTime = datetime.datetime.strptime( str(runTime), '%Y%m%d_%H%M%S').timestamp() realRunTime = int((currentRunTime - firstRunTime) / 60) realRunTimeList.append(realRunTime) mem = memList[i] if mem == '': mem = '0' realMem = round(int(mem) / 1024, 1) realMemList.append(realMem) memCurveFig = str(config.tmpPath) + '/' + str( self.user) + '_' + str(job) + '.png' jobNum = common.stringToInt(job) print('Save job memory curve as "' + str(memCurveFig) + '".') common.drawPlot(realRunTimeList, realMemList, 'runTime (Minitu)', 'memory (G)', yUnit='G', title='job : ' + str(job), saveName=memCurveFig, figureNum=jobNum) if self.jobDbFileConnectResult == 'passed': self.jobDbConn.close()