def __init__(self, deltaFilesDir, dataDir, confDir, tempDir, counters, logger, archiver): self._deltaFilesDir = deltaFilesDir self._dataDir = dataDir self._confDir = confDir self._tempDir = tempDir self._counters = counters self._log = logger.createLoggerSameModule( G_NAME_GROUP_DELIVERY_TRACKING) self._totalDeltaWriteTime = 0 self._deltaSeqNum = 0 self._sessions = None self._recentlyBlocked = None self._whitelist = None self._noDeliveryTable = {} self._noDeliveryTableChanged = False self._archiver = archiver self._isInitialized = False self._deltaPattern = "no-delivery-table-delta-" self._deltaSuffix = ".json" # File for delta updates self._deltaFile = RotatingFileSizeEnforcer(self._log, self._deltaFilesDir, self._deltaPattern, self._deltaSuffix) self._deltaFile.prepare()
def init (self, conf): logsDirectoryPath = conf.ngxLogDir logsDirectorySizeByte = conf.nginxDirectoryLogSizeByte errLogPath = conf.ngxLogFileSymLink errLogSizeByte = conf.nginxLogFileSizeByte self.__rotator = RotatingFileSizeEnforcer(self.__log,logsDirectoryPath,"log-",".txt") self.__rotator.initSymlink(os.path.dirname(errLogPath),os.path.basename(errLogPath)) self.__log("log-rotator").info("Log Symbolic Link %s",errLogPath); self.__errLogSizeByte = errLogSizeByte try: # old files might be deleted to obey directory size self.__rotator.setTotalSize(logsDirectorySizeByte) # The sym Link is created old files might be deleted to obey directory size # the SymLink is linking between the errLogFile and Logs Directory self.__rotator.prepare() self.__log("log-rotator").info("Init Nginx Log, Directory Size = %d File Size = %d",logsDirectorySizeByte,errLogSizeByte) except Exception, e: self.__log("log-rotator-err").exception("Failed to init Nginx Log Rotator - %s, %s",utils.parseErrnoToString(e), sys.exc_info()[1]) return False
def init(self, conf): logsDirectoryPath = conf.ngxLogDir logsDirectorySizeByte = conf.nginxDirectoryLogSizeByte errLogPath = conf.ngxLogFileSymLink errLogSizeByte = conf.nginxLogFileSizeByte self.__rotator = RotatingFileSizeEnforcer(self.__log, logsDirectoryPath, "log-", ".txt") self.__rotator.initSymlink(os.path.dirname(errLogPath), os.path.basename(errLogPath)) self.__log("log-rotator").info("Log Symbolic Link %s", errLogPath) self.__errLogSizeByte = errLogSizeByte try: # old files might be deleted to obey directory size self.__rotator.setTotalSize(logsDirectorySizeByte) # The sym Link is created old files might be deleted to obey directory size # the SymLink is linking between the errLogFile and Logs Directory self.__rotator.prepare() self.__log("log-rotator").info( "Init Nginx Log, Directory Size = %d File Size = %d", logsDirectorySizeByte, errLogSizeByte) except Exception, e: self.__log("log-rotator-err").exception( "Failed to init Nginx Log Rotator - %s, %s", utils.parseErrnoToString(e), sys.exc_info()[1]) return False
def updateConfig (self, logType, logDir, confDir, historyDir, name): self._log("update-config").debug1("Update config (%s)" % self._name) self._logType = logType self._logDir = logDir self._confDir = confDir self._historyDir = historyDir self._configFile = os.path.join(self._confDir, "logpusher.conf") self._pidFile = os.path.join(self._confDir, "logpusher.pid") self._name = name self._shouldRestart = True # File size enforcer on the history dir self._historySizeEnforcer = RotatingFileSizeEnforcer(self._log, self._logDir, "tran-"+self._name+"-", ".log.bz") self._historySizeEnforcer.initFileRotatingPattern( "%s.%s" % (RotatingFileSizeEnforcer.KICK_NUM_4, RotatingFileSizeEnforcer.EPOCH_SECONDS_10)) self._historySizeEnforcer.setTotalSize(self._historyDirMaxSizeMB * 1024 * 1024) self._lastHistoryDirSizeEnforce = 0
class NginxLog(object): #--------------------------------------------------------------------------------------------------------- def __init__(self, logger): self.__log = logger.createLogger(G_NAME_MODULE_DELIVERY, G_NAME_GROUP_DELIVERY_NGINX_LOG) #--------------------------------------------------------------------------------------------------------- # After calling to init need to Reopen Nginx Log Files def init(self, conf): logsDirectoryPath = conf.ngxLogDir logsDirectorySizeByte = conf.nginxDirectoryLogSizeByte errLogPath = conf.ngxLogFileSymLink errLogSizeByte = conf.nginxLogFileSizeByte self.__rotator = RotatingFileSizeEnforcer(self.__log, logsDirectoryPath, "log-", ".txt") self.__rotator.initSymlink(os.path.dirname(errLogPath), os.path.basename(errLogPath)) self.__log("log-rotator").info("Log Symbolic Link %s", errLogPath) self.__errLogSizeByte = errLogSizeByte try: # old files might be deleted to obey directory size self.__rotator.setTotalSize(logsDirectorySizeByte) # The sym Link is created old files might be deleted to obey directory size # the SymLink is linking between the errLogFile and Logs Directory self.__rotator.prepare() self.__log("log-rotator").info( "Init Nginx Log, Directory Size = %d File Size = %d", logsDirectorySizeByte, errLogSizeByte) except Exception, e: self.__log("log-rotator-err").exception( "Failed to init Nginx Log Rotator - %s, %s", utils.parseErrnoToString(e), sys.exc_info()[1]) return False return True
class NginxLog (object): #--------------------------------------------------------------------------------------------------------- def __init__ (self, logger): self.__log = logger.createLogger(G_NAME_MODULE_DELIVERY, G_NAME_GROUP_DELIVERY_NGINX_LOG) #--------------------------------------------------------------------------------------------------------- # After calling to init need to Reopen Nginx Log Files def init (self, conf): logsDirectoryPath = conf.ngxLogDir logsDirectorySizeByte = conf.nginxDirectoryLogSizeByte errLogPath = conf.ngxLogFileSymLink errLogSizeByte = conf.nginxLogFileSizeByte self.__rotator = RotatingFileSizeEnforcer(self.__log,logsDirectoryPath,"log-",".txt") self.__rotator.initSymlink(os.path.dirname(errLogPath),os.path.basename(errLogPath)) self.__log("log-rotator").info("Log Symbolic Link %s",errLogPath); self.__errLogSizeByte = errLogSizeByte try: # old files might be deleted to obey directory size self.__rotator.setTotalSize(logsDirectorySizeByte) # The sym Link is created old files might be deleted to obey directory size # the SymLink is linking between the errLogFile and Logs Directory self.__rotator.prepare() self.__log("log-rotator").info("Init Nginx Log, Directory Size = %d File Size = %d",logsDirectorySizeByte,errLogSizeByte) except Exception, e: self.__log("log-rotator-err").exception("Failed to init Nginx Log Rotator - %s, %s",utils.parseErrnoToString(e), sys.exc_info()[1]) return False return True
def renameFunc (fileBaseName): ret = RotatingFileSizeEnforcer.s_getFileNameRemovePhase(fileBaseNamePattern, fileBaseName) if ret is None: self._logAct("failed-translate-file").error("Failed to remove the phase from file name %s", fileBaseName) return fileBaseName#best we can do return ret
def archive(self, doBeforeEachFile, doAfterEachFile): #return an error class in case of failure, retrun code will be the number of archived files o.w. with self._configurationLock: self._destinationData = self._runningDestinationData self._destinationOutputData = self._runningDestinationOutputData if self._destinationData is None: self._logAct("archive-no-data").error("Archive function is called but destination data was not set") return ReturnCodes.kGeneralError if self._destinationOutputData is None: self._logAct("archive-no-data").error("Archive function is called but destination output data was not set") return ReturnCodes.kGeneralError compressionMethod = None if self._destinationOutputData.archiveMode == LogArchiveModeType.kNone: self._logAct("nothing-to-do").debug1("Archive mode is none. nothing to do") return ReturnCodes.kOk elif self._destinationOutputData.archiveMode == LogArchiveModeType.kNoCompression: compressionMethod = a.infra.file.std_archiver.StdArchiver.COMPRESSION_METHOD_NONE elif self._destinationOutputData.archiveMode == LogArchiveModeType.kGzipStandard: compressionMethod = a.infra.file.std_archiver.StdArchiver.COMPRESSION_METHOD_GZIP elif self._destinationOutputData.archiveMode == LogArchiveModeType.kGzipBest: compressionMethod = a.infra.file.std_archiver.StdArchiver.COMPRESSION_METHOD_GZIP_BEST elif self._destinationOutputData.archiveMode == LogArchiveModeType.kGzipFast: compressionMethod = a.infra.file.std_archiver.StdArchiver.COMPRESSION_METHOD_GZIP_FAST elif self._destinationOutputData.archiveMode == LogArchiveModeType.kBz2Standard: compressionMethod = a.infra.file.std_archiver.StdArchiver.COMPRESSION_METHOD_BZ2 elif self._destinationOutputData.archiveMode == LogArchiveModeType.kBz2Best: compressionMethod = a.infra.file.std_archiver.StdArchiver.COMPRESSION_METHOD_BZ2_BEST elif self._destinationOutputData.archiveMode == LogArchiveModeType.kBz2Fast: compressionMethod = a.infra.file.std_archiver.StdArchiver.COMPRESSION_METHOD_BZ2_FAST if compressionMethod == None: self._logAct("unknown-archive-mode").error("Unknown archive mode: %s", self._destinationOutputData.archiveMode) return ReturnCodes.kGeneralError fileNameSuffix = None if self._destinationData.destinationType == DestinationType.kText: fileNameSuffix = ".txt" elif self._destinationData.destinationType == DestinationType.kTextCsv: fileNameSuffix = ".csv" elif self._destinationData.destinationType == DestinationType.kTextSingleLine: fileNameSuffix = ".txt" elif self._destinationData.destinationType == DestinationType.kBinary: fileNameSuffix = ".qbl" if fileNameSuffix == None: self._logAct("unknown-destination-type").error("Unknown destination type mode: %s", self._destinationData.destinationType) return ReturnCodes.kGeneralError fileDirectory = self._destinationOutputData.fileDirectory fileNamePrefix = self._destinationOutputData.fileBaseName splitted = fileNamePrefix.split(self._FILE_STD_STRUCTORE_MARKER) if len(splitted)!=2: self._logAct("invlalid-file-name").error("Invalid file name: '%s'. not supporting phases", self._destinationData.destinatiofileNamePrefix) fileNamePrefix = splitted[0] fileNameSuffix = splitted[1]+fileNameSuffix fileBaseNamePattern = RotatingFileSizeEnforcer.s_getDefaultBaseNamePattern(fileNamePrefix, fileNameSuffix, True) fileBaseNameGlobPattern = RotatingFileSizeEnforcer.s_calcFilesGlobPattern(fileBaseNamePattern, RotatingFileSizeEnforcer.PHASE_PENDING) def renameFunc (fileBaseName): ret = RotatingFileSizeEnforcer.s_getFileNameRemovePhase(fileBaseNamePattern, fileBaseName) if ret is None: self._logAct("failed-translate-file").error("Failed to remove the phase from file name %s", fileBaseName) return fileBaseName#best we can do return ret archiver = a.infra.file.std_archiver.StdArchiver(self._logAct, fileDirectory, fileBaseNameGlobPattern) rc = archiver.initTargetFileNameConversion(renameFunc) if not rc.success(): self._logAct("failed-init-archiver-nc").error("Failed to init the archiver name conversion") return ReturnCodes.kGeneralError rc = archiver.initCompression(compressionMethod, True) if not rc.success(): self._logAct("failed-init-archiver-c").error("Failed to init the archiver compression method") return ReturnCodes.kGeneralError rc = archiver.initDeleteEmptyFiles() if not rc.success(): self._logAct("failed-init-archiver-de").error("Failed to init the archiver empty files deletion") return ReturnCodes.kGeneralError archiver.setStopFlagFunctor(self._shallStopFunctor) rc = archiver.initDone() if not rc.success(): self._logAct("failed-init-archiver-do").error("Failed to init done the archiver") return ReturnCodes.kGeneralError rc = archiver.archive(doBeforeEachFile=doBeforeEachFile, doAfterEachFile=doAfterEachFile) if not rc.success(): self._logAct("failed-archive").error("Failed to archive the files") return ReturnCodes.kGeneralError return rc#also holds the number of archived files
def init (self, countersDBInfo, jobsQueue, logger, baseLogger, dataRunDirPath, descriptionDbFilesPath, dbFilesOutputPath, dbFilesBaseName, rrdtoolExePath, rrdtoolWrapperExePath, oldDataDir, prevRunDir, updateBurstCount, updateBurstIntervalMsec, enableRecovery): #Initialize memeber variables self.myJobsQueue = jobsQueue self.myLogger = logger self.myDescriptionDbFilesPath = descriptionDbFilesPath self.myDataRunDirPath = dataRunDirPath self.mydbRRdFilesOutputPath = dbFilesOutputPath self.myOldDataDir = oldDataDir self.myPrevRunDir = prevRunDir self.__makeDirsIfNeeded(self.myPrevRunDir) self.myUpdateBurstCount = updateBurstCount self.myUpdateBurstInterval = updateBurstIntervalMsec / 1000.0 self.myEnableRecovery = enableRecovery # The history files rotating file self.myHistoryFilePattern = "update-" self.myHistoryFileSuffix = ".csv" self.myHistoryRotatingFile = RotatingFileSizeEnforcer( baseLogger, self.myPrevRunDir, self.myHistoryFilePattern, self.myHistoryFileSuffix) self.myHistoryRotatingFile.prepare() #Create meta-data DB #dbTimeString = datetime.now() #Part of the file name is a date string dbTimeString = my_datetime.now() #Part of the file name is a date string self.myDescriptionsDB = SQLiteDescriptorsDB() #Search for an old DB file sqliteDB = [f for f in os.listdir(descriptionDbFilesPath) if f.lower().endswith(SQLITE_EXTENTION)] rrdDBs = self.getPreviousRrdPaths(dbFilesOutputPath) if len(sqliteDB) == 0: if len(rrdDBs) > 0: self.myLogger.notice("StatsAggregator - sqlite file wasn't found while " + str(len(rrdDBs)) + " rrd files exist") self.__copyDbsToOldDir (dbTimeString) else: self.myLogger.debug5("StatsAggregator - sqlite and RRD files doesn't exist - ok") else: if len(rrdDBs) ==0: self.myLogger.notice("StatsAggregator - sqlite file found while no rrd files exist") self.__copyDbsToOldDir (dbTimeString) else: self.myLogger.debug5("StatsAggregator - RRD and sqlite files doesn't exist - ok") self.myDescriptionsDB.init(self.myLogger, descriptionDbFilesPath, dbFilesBaseName) #Distinguish between old and new counters previousConfigurations = self.myDescriptionsDB.getConfigurationsFromDB() if previousConfigurations is None: #notice Error self.myLogger.notice("StatsAggregator - existing sqlite file is malformed") self.__copyDbsToOldDir (dbTimeString) previousConfigurations = [] self.myLogger.debug2("StatsAggregator old configuration DB loaded") #Get only the new counters out of all the configured ones newCounters = self.diffAgainstNewConfig(countersDBInfo, previousConfigurations) if newCounters is None: #There should be no counter changes, if one is found, we copy all previous data to a temp directory and create new DBs self.myLogger.notice("An ilegal change was made in the configurations file - all previous data copied to " + self.myOldDataDir + ". Creating new stats DBs") self.__copyDbsToOldDir (dbTimeString) #All counters are considered new newCounters = countersDBInfo #Now we need to reCreate the self.myDescriptionsDB sience we moved the file self.myDescriptionsDB = SQLiteDescriptorsDB() self.myDescriptionsDB.init(self.myLogger, descriptionDbFilesPath, dbFilesBaseName) previousConfigurations=[] if len(previousConfigurations) > 0: previousCountersDbPathList = self.myDescriptionsDB.getPreviousDbPath() if len(previousCountersDbPathList) != 1: self.myLogger.error("StatsAggregator - too many sqlite files found") self.__copyDbsToOldDir (dbTimeString) newCounters = countersDBInfo self.myLogger.debug2("StatsAggregator - previous counters configuration was empty - creating a new SQLite DB") errFlag = self.myDescriptionsDB.createDB(countersDBInfo, dbTimeString, newCounters) if errFlag: self.myLogger.error("StatsAggregator - create db state 1 failed") return errFlag else: previousCountersDbPath = self.myFileName = os.path.join(descriptionDbFilesPath,previousCountersDbPathList[0]) #If we are here then this is not the first run on this machine if newCounters is None: self.myLogger.error("StatsAggregator - error occured while comparing counters from sqlite file and new configured ones") return 3215374 if len(newCounters) == 0: self.myLogger.debug2("StatsAggregator - no new counters configured") errFlag = self.myDescriptionsDB.createDB(countersDBInfo, dbTimeString, {}, previousCountersDB=previousCountersDbPath) if 0<errFlag: self.myLogger.error("StatsAggregator - create db state 2 failed") return errFlag else: self.myLogger.debug2("StatsAggregator - %d new counters configured" % len(newCounters)) errFlag = self.myDescriptionsDB.createDB(countersDBInfo, dbTimeString, newCounters, previousCountersDB=previousCountersDbPath) if 0<errFlag: self.myLogger.error("StatsAggregator - create db state 3 failed") return errFlag #If we are hear then 1. It is not the first run 2. Some coutners were added to DB in previous run and wasn't updated self.myLogger.debug2("StatsAggregator - updating old counters") errFlag = self.myDescriptionsDB.updateDB(previousConfigurations) if 0<errFlag: self.myLogger.error("StatsAggregator - updating previous counters UI settings failed") return errFlag errFlag = self.myDescriptionsDB.updateDB(newCounters) if 0<errFlag: self.myLogger.error("StatsAggregator - updating new counters UI settings failed") return errFlag else: newCounters = countersDBInfo #If we are here then this is probably the first run on this machine or someone deleted all previous data self.myLogger.debug2("StatsAggregator - previous counters configuration was empty - creating a new SQLite DB") errFlag = self.myDescriptionsDB.createDB(countersDBInfo, dbTimeString, newCounters) if 0<errFlag: self.myLogger.error("StatsAggregator - create db state 4 failed") return errFlag #Create values DB self.myValuesDB = RRDToolValuesDB() self.myValuesDB.init(self.myDescriptionsDB, self.myLogger, self.myDataRunDirPath, dbFilesOutputPath, dbFilesBaseName, rrdtoolExePath, rrdtoolWrapperExePath) errFlag = self.myValuesDB.createDB(countersDBInfo, dbTimeString) if errFlag: self.myLogger.error("StatsAggregator - create db state 5 failed") return errFlag #SQLite won't allow us to use this cursur from other threads. self.myDescriptionsDB.closeDBConnection() self.myRunFlag = True threading.Thread.__init__(self) return 0
class StatsAggregator(threading.Thread): def __init__ (self): pass #Finds previous rrd file paths def getPreviousRrdPaths (self, rrdDirPath): list_of_files = [] for root, dirs, files in os.walk(rrdDirPath): for f in files: if f.endswith(RRD_EXTENTION): list_of_files.append(f) return list_of_files def __copyDbsToOldDir (self, dbTimeString): tempPath = os.path.join(self.myOldDataDir, str(dbTimeString.strftime(FILE_NAME_TIMESTAMP_FORMAT))) self.__delDirsIfNeeded(tempPath) self.__moveSubDirsToDir(self.mydbRRdFilesOutputPath, tempPath) self.__moveFilesToDir(self.myDescriptionDbFilesPath, tempPath) self.__createCompressedTar(tempPath, tempPath+".gzip") #Clean self.__delDirsIfNeeded(tempPath) #countersDBInfo - A dictionary that holds key = counter ID value = counter descriptor object #jobsQueue - The queue for the design pattern used, through it the sampled counters are sent to the aggregator to be stored #logger - dah #dbFilesOutputPath - The path where the db files will be stored #dbFilesBaseName - The prefix for the db files' names #rrdtoolExePath - The path to the RRDTool .exe file def init (self, countersDBInfo, jobsQueue, logger, baseLogger, dataRunDirPath, descriptionDbFilesPath, dbFilesOutputPath, dbFilesBaseName, rrdtoolExePath, rrdtoolWrapperExePath, oldDataDir, prevRunDir, updateBurstCount, updateBurstIntervalMsec, enableRecovery): #Initialize memeber variables self.myJobsQueue = jobsQueue self.myLogger = logger self.myDescriptionDbFilesPath = descriptionDbFilesPath self.myDataRunDirPath = dataRunDirPath self.mydbRRdFilesOutputPath = dbFilesOutputPath self.myOldDataDir = oldDataDir self.myPrevRunDir = prevRunDir self.__makeDirsIfNeeded(self.myPrevRunDir) self.myUpdateBurstCount = updateBurstCount self.myUpdateBurstInterval = updateBurstIntervalMsec / 1000.0 self.myEnableRecovery = enableRecovery # The history files rotating file self.myHistoryFilePattern = "update-" self.myHistoryFileSuffix = ".csv" self.myHistoryRotatingFile = RotatingFileSizeEnforcer( baseLogger, self.myPrevRunDir, self.myHistoryFilePattern, self.myHistoryFileSuffix) self.myHistoryRotatingFile.prepare() #Create meta-data DB #dbTimeString = datetime.now() #Part of the file name is a date string dbTimeString = my_datetime.now() #Part of the file name is a date string self.myDescriptionsDB = SQLiteDescriptorsDB() #Search for an old DB file sqliteDB = [f for f in os.listdir(descriptionDbFilesPath) if f.lower().endswith(SQLITE_EXTENTION)] rrdDBs = self.getPreviousRrdPaths(dbFilesOutputPath) if len(sqliteDB) == 0: if len(rrdDBs) > 0: self.myLogger.notice("StatsAggregator - sqlite file wasn't found while " + str(len(rrdDBs)) + " rrd files exist") self.__copyDbsToOldDir (dbTimeString) else: self.myLogger.debug5("StatsAggregator - sqlite and RRD files doesn't exist - ok") else: if len(rrdDBs) ==0: self.myLogger.notice("StatsAggregator - sqlite file found while no rrd files exist") self.__copyDbsToOldDir (dbTimeString) else: self.myLogger.debug5("StatsAggregator - RRD and sqlite files doesn't exist - ok") self.myDescriptionsDB.init(self.myLogger, descriptionDbFilesPath, dbFilesBaseName) #Distinguish between old and new counters previousConfigurations = self.myDescriptionsDB.getConfigurationsFromDB() if previousConfigurations is None: #notice Error self.myLogger.notice("StatsAggregator - existing sqlite file is malformed") self.__copyDbsToOldDir (dbTimeString) previousConfigurations = [] self.myLogger.debug2("StatsAggregator old configuration DB loaded") #Get only the new counters out of all the configured ones newCounters = self.diffAgainstNewConfig(countersDBInfo, previousConfigurations) if newCounters is None: #There should be no counter changes, if one is found, we copy all previous data to a temp directory and create new DBs self.myLogger.notice("An ilegal change was made in the configurations file - all previous data copied to " + self.myOldDataDir + ". Creating new stats DBs") self.__copyDbsToOldDir (dbTimeString) #All counters are considered new newCounters = countersDBInfo #Now we need to reCreate the self.myDescriptionsDB sience we moved the file self.myDescriptionsDB = SQLiteDescriptorsDB() self.myDescriptionsDB.init(self.myLogger, descriptionDbFilesPath, dbFilesBaseName) previousConfigurations=[] if len(previousConfigurations) > 0: previousCountersDbPathList = self.myDescriptionsDB.getPreviousDbPath() if len(previousCountersDbPathList) != 1: self.myLogger.error("StatsAggregator - too many sqlite files found") self.__copyDbsToOldDir (dbTimeString) newCounters = countersDBInfo self.myLogger.debug2("StatsAggregator - previous counters configuration was empty - creating a new SQLite DB") errFlag = self.myDescriptionsDB.createDB(countersDBInfo, dbTimeString, newCounters) if errFlag: self.myLogger.error("StatsAggregator - create db state 1 failed") return errFlag else: previousCountersDbPath = self.myFileName = os.path.join(descriptionDbFilesPath,previousCountersDbPathList[0]) #If we are here then this is not the first run on this machine if newCounters is None: self.myLogger.error("StatsAggregator - error occured while comparing counters from sqlite file and new configured ones") return 3215374 if len(newCounters) == 0: self.myLogger.debug2("StatsAggregator - no new counters configured") errFlag = self.myDescriptionsDB.createDB(countersDBInfo, dbTimeString, {}, previousCountersDB=previousCountersDbPath) if 0<errFlag: self.myLogger.error("StatsAggregator - create db state 2 failed") return errFlag else: self.myLogger.debug2("StatsAggregator - %d new counters configured" % len(newCounters)) errFlag = self.myDescriptionsDB.createDB(countersDBInfo, dbTimeString, newCounters, previousCountersDB=previousCountersDbPath) if 0<errFlag: self.myLogger.error("StatsAggregator - create db state 3 failed") return errFlag #If we are hear then 1. It is not the first run 2. Some coutners were added to DB in previous run and wasn't updated self.myLogger.debug2("StatsAggregator - updating old counters") errFlag = self.myDescriptionsDB.updateDB(previousConfigurations) if 0<errFlag: self.myLogger.error("StatsAggregator - updating previous counters UI settings failed") return errFlag errFlag = self.myDescriptionsDB.updateDB(newCounters) if 0<errFlag: self.myLogger.error("StatsAggregator - updating new counters UI settings failed") return errFlag else: newCounters = countersDBInfo #If we are here then this is probably the first run on this machine or someone deleted all previous data self.myLogger.debug2("StatsAggregator - previous counters configuration was empty - creating a new SQLite DB") errFlag = self.myDescriptionsDB.createDB(countersDBInfo, dbTimeString, newCounters) if 0<errFlag: self.myLogger.error("StatsAggregator - create db state 4 failed") return errFlag #Create values DB self.myValuesDB = RRDToolValuesDB() self.myValuesDB.init(self.myDescriptionsDB, self.myLogger, self.myDataRunDirPath, dbFilesOutputPath, dbFilesBaseName, rrdtoolExePath, rrdtoolWrapperExePath) errFlag = self.myValuesDB.createDB(countersDBInfo, dbTimeString) if errFlag: self.myLogger.error("StatsAggregator - create db state 5 failed") return errFlag #SQLite won't allow us to use this cursur from other threads. self.myDescriptionsDB.closeDBConnection() self.myRunFlag = True threading.Thread.__init__(self) return 0 def setAggrHistory (self, aggrHistory): self.myAggrHistory = aggrHistory self.myNumHistoryFiles = 0 def setNumHistoryFiles (self, numHistoryFiles): self.myNumHistoryFiles = numHistoryFiles def loadHistoryFiles (self): # Get a list of history files and sort them in an alphnumeric order. # Since kick number and date are encoded in the file name we will get # the files in creation order historyFiles = os.listdir(self.myPrevRunDir) historyFiles.sort() self.myLogger.notice("StatsAggregator load history files. Found %d files in prev run folder %s" % (len(historyFiles), self.myPrevRunDir)) # Sort the list of files for fileName in historyFiles: self.myLogger.debug1("StatsAggregator loading history file %s", fileName) fullName = os.path.join(self.myPrevRunDir, fileName) with open(fullName) as f: lineNum = 0 for line in f: try: lineNum += 1 (name, value, timestamp, valueType) = self.__parseHistoryLine(line.rstrip()) self.myAggrHistory.update(name, value, valueType, timestamp) except Exception as ex: self.myLogger.error("Error loading line %d in file %s. Discard rest of the file. %s" % (lineNum, fileName, ex)) break self.myLogger.notice("StatsAggregator finished loading history files") def saveHistoryFile (self, allUpdatesStr): self.myHistoryRotatingFile.rotate() fileName = self.myHistoryRotatingFile.getCurrentFileName() self.myLogger.debug1("StatsAggregator save history file %s" % fileName) try: with open(fileName, 'w') as f: f.write(allUpdatesStr) except Exception as ex: self.myLogger.error("Error write update history to file %s. %s" % (fileName, ex)) # To enforce limit on number of history files we delete the oldest files. historyFiles = os.listdir(self.myPrevRunDir) historyFiles.sort() numFiles = len(historyFiles) numToDelete = numFiles - self.myNumHistoryFiles idx = 0 self.myLogger.debug3("StatsAggregator enforce history limit: found %s, allowed %d" % (numFiles, self.myNumHistoryFiles)) while numToDelete > 0: fullPath = os.path.join(self.myPrevRunDir, historyFiles[idx]) try: self.myLogger.debug3("StatsAggregator enforce history limit: remove %s" % historyFiles[idx]) os.remove(fullPath) except Exception as ex: self.myLogger.error("Enforce history limit: Error delete file %s. %s" % (fullPath, ex)) idx += 1 numToDelete -= 1 # This is the module's main function. It dequeues the jobs from the queue and execute them # The jobs ***MUST*** be executed synchronically since in the future the counters might change # and a new file has to be created before inserting any value def run (self): self.myLogger.notice("Stats Aggregator running") reportLimiter = EventReportLimiter(3600) try: self.myRunFlag = True self.myDescriptionsDB.connectToDb() while(self.myRunFlag): self.myLogger.debug2("StatsAggregator blocking in order to dequeue. Time = %d" % time.time()) job = self.__dequeue() if None == job: self.myLogger.debug1("StatsAggregator dequeued a job from jobs queue. Message is None -> unblock message") continue self.myLogger.debug2("StatsAggregator message dequeued successfully. Time = %d" % time.time()) # If this is a sample result job (The only legal message for v1.0) startTime = time.time() if (job.quack() == stats_queue_objects.AggregationQueueJobType.VALUES): self.myLogger.debug2("StatsAggregator received values job") # Iterate on all received counters and update global aggr dictionary. # Respect counter burst params allUpdatesStr = "" # This will be saved to a history file counterIdx = 0 numCounters = len(job.myCountersArray) while counterIdx < numCounters: burstStartTime = time.time() self.myAggrHistory.lock() i = 0 self.myLogger.debug3("Start burts") while i < self.myUpdateBurstCount: counterVal = job.myCountersArray[counterIdx] self.myLogger.debug5("Updating couner id %s values (%s, %s)", counterVal.myCounterId, counterVal.myValue, counterVal.myTimeStamp) valueType = type(counterVal.myValue).__name__ self.myAggrHistory.update(counterVal.myCounterId, counterVal.myValue, valueType, counterVal.myTimeStamp) allUpdatesStr += self.__formatHistoryLine(counterVal.myCounterId, counterVal.myValue, counterVal.myTimeStamp, valueType) i += 1 counterIdx += 1 if counterIdx == numCounters: break self.myAggrHistory.unlock() burstTime = time.time() - burstStartTime self.myLogger.debug3("End burts") # Warn about a long burst, or count it for the next warning maxAllowedBurstTime = self.myUpdateBurstInterval * 0.75 if burstTime > maxAllowedBurstTime: (shouldReport, numEvents) = reportLimiter.shouldReport() if shouldReport: self.myLogger.warning("Aggregator update burst took too long: %s seconds, max allowed is %s. (Occured %s times since last reported)", burstTime, maxAllowedBurstTime, numEvents) # If we have more counters, sleep if counterIdx < numCounters: remainingTime = self.myUpdateBurstInterval - burstTime if remainingTime > 0: time.sleep(remainingTime) self.saveHistoryFile(allUpdatesStr) insertTimeSpan = time.time()-startTime if insertTimeSpan > MAX_INSERT_TO_DB_TIME_SPAN: self.myLogger.warning("Execute job exceeds the time limit. Limit is %d sec. Elapsed time is %s sec, numCounters=%s. Queue size is: %d", MAX_INSERT_TO_DB_TIME_SPAN, insertTimeSpan, numCounters, self.myJobsQueue._qsize()) else: self.myLogger.notice("Execute job. Elapsed time is %s sec, numCounters=%s. Queue size is: %d", insertTimeSpan, numCounters, self.myJobsQueue._qsize()) if self.myRunFlag: self.myLogger.notice("Stats Aggregator thread ended enexpectedly") else: self.myLogger.notice("Stats Aggregator thread ended") self.myRunFlag = False except: #Loop ended - thread is shutting down self.myRunFlag = False self.myLogger.error("UNExpected exception on Stats Aggregator. Exception info: %s" % stats_queue_objects.formatExceptionInfo()) #When the while loop ended = shutdown - close the DBs self.myValuesDB.closeDBConnection() self.myDescriptionsDB.closeDBConnection() self.myLogger.debug2("StatsAggregator thread ended") def __formatHistoryLine (self, name, value, timestamp, valueType): return "%s,%s,%s,%s\n" % (name, value, timestamp, valueType) def __parseHistoryLine (self, line): (name, strValue, timestamp, valueType) = line.split(',') floatTimestamp = None if timestamp != 'None': floatTimestamp = float(timestamp) value = strValue if valueType == 'int': value = int(strValue) elif value == 'float': value = float(strValue) # name is actually a numeric counter id return (int(name), value, floatTimestamp, valueType) # creates a compressed tar file containing all the files specified + the temp directory def __createCompressedTar(self, directory, tarOutputFilePath): try: tarResultPath = tarOutputFilePath tar = tarfile.open(tarResultPath, TAR_GZIP_FLAG) #tar.dereference = True - This is too dangerous - Can link to huge files/directories - removed on purpuse - don't add this! except: self.myLogger.error("Exception occured while creating the Tar file - tar file can't be created" + stats_queue_objects.formatExceptionInfo()) return False try: tar.add(directory) except: self.myLogger.error("Exception occured while creating the Tar file" + stats_queue_objects.formatExceptionInfo()) return False try: tar.close() except: self.myLogger.error("Exception occured while creating the Tar file - tar file can't be closed" + stats_queue_objects.formatExceptionInfo()) return False self.myLogger.debug3("Tar file created successfully") return True def __delDirsIfNeeded (self, dirPath): try: if os.path.exists(dirPath): if os.path.isdir(dirPath): shutil.rmtree(dirPath) except: self.myLogger.error("UNExpected exception on Stats Aggregator.__delDirsIfNeeded. Exception info: %s" % stats_queue_objects.formatExceptionInfo()) def __makeDirsIfNeeded (self, dirPath): try: if os.path.exists(dirPath): if os.path.isdir(dirPath): return True else: return False else: oldUmask = os.umask(0) os.makedirs(dirPath) os.umask(oldUmask) return True except: return False def __moveSubDirsToDir(self, srcDir, dstDir): try: self.__makeDirsIfNeeded(dstDir) listOfFiles = os.listdir(srcDir) for d in listOfFiles: srcSubDir = os.path.join(srcDir, d) if srcSubDir == dstDir: continue if os.path.isdir(srcSubDir): shutil.move(srcSubDir, dstDir) except: self.myLogger.error("UNExpected exception on Stats Aggregator.__moveSubDirsToDir. Exception info: %s" % stats_queue_objects.formatExceptionInfo()) def __moveFilesToDir(self, srcDir, dstDir): self.__makeDirsIfNeeded(dstDir) for root, dirs, files in os.walk(srcDir): for f in files: srcFile = os.path.join(srcDir, f) shutil.copy(srcFile, dstDir) os.remove(srcFile) #The user has to check queue size before def __dequeue (self): result = self.myJobsQueue.get(True) return result def isRunning (self): #return True; return self.myRunFlag def end (self): self.myLogger.notice("Stats Aggregator request to end thread") self.myRunFlag = False self.myJobsQueue.put(None) def setLogger (self, logger): self.myLogger = None self.myLogger = logger self.myDescriptionsDB.setLogger(logger) self.myValuesDB.setLogger(logger) #returns the changes between the new configuration and the old one #If there is an ilegal change logs and exits def diffAgainstNewConfig (self, newConfig, previousConfig): if newConfig is None: self.myLogger.error("Unexpected error occured during comparison between new and old configurations") return None #This means there aren't any previous configurations if len(previousConfig) == 0: return newConfig #New added counters are not compared newCounters = {} #Go over all counters in the new configurations for counter in newConfig.values(): #If the counter is new - doesn't exist in previous configuration if counter.myCounterId in previousConfig.keys(): #Check that it wasn't changed. If not mark it as new, otherwise - fail - it is ilegal to change counters without deleting the history / changing directories if counter != previousConfig[counter.myCounterId]: #If you got here it means that an ilegal diff was found self.myLogger.error("An ilegal change was made in the configurations file. Counter Info: %s" % counter.toString()) return None else: self.myLogger.debug3("New counter found. Counter = %s" % counter.toString()) newCounters[counter.myCounterId] = counter #We don't check for missing counters since it is legal to disable counters return newCounters
class DeliveryTracking: def __init__(self, deltaFilesDir, dataDir, confDir, tempDir, counters, logger, archiver): self._deltaFilesDir = deltaFilesDir self._dataDir = dataDir self._confDir = confDir self._tempDir = tempDir self._counters = counters self._log = logger.createLoggerSameModule( G_NAME_GROUP_DELIVERY_TRACKING) self._totalDeltaWriteTime = 0 self._deltaSeqNum = 0 self._sessions = None self._recentlyBlocked = None self._whitelist = None self._noDeliveryTable = {} self._noDeliveryTableChanged = False self._archiver = archiver self._isInitialized = False self._deltaPattern = "no-delivery-table-delta-" self._deltaSuffix = ".json" # File for delta updates self._deltaFile = RotatingFileSizeEnforcer(self._log, self._deltaFilesDir, self._deltaPattern, self._deltaSuffix) self._deltaFile.prepare() def updateConfig(self, windowSizeSec, windowNumFrames, maxSessionKeys, maxNoDeliveryTableSize, blockTTLSec, shortBlockTTLSec, whitelistFile, newSessionIgnorePeriod, floodProtectionEnabled, floodProtectionWindowSizeSec, floodProtectionWindowNumFrames, floodProtectionRatio, minSessionsToAllowProtection, shouldArchiveNoDeliveryTable, shouldArchiveDeliveryTrackingUpdatesToLine, deliveryTrackingArchiveTableInterval): self._log("init").notice( "Update configuration. Window %d x %d sec, max session keys %d, max no delivery table size %d, TTL %d, Short TTL %d, " + "flood protection ratio %f, should archive table %s, should archive updates %s", windowSizeSec, windowNumFrames, maxSessionKeys, maxNoDeliveryTableSize, blockTTLSec, shortBlockTTLSec, floodProtectionRatio, shouldArchiveNoDeliveryTable, shouldArchiveDeliveryTrackingUpdatesToLine) self._maxSessionKeys = maxSessionKeys self._maxNoDeliveryTableSize = maxNoDeliveryTableSize self._blockTTLSec = blockTTLSec self._shortBlockTTLSec = shortBlockTTLSec self._whitelistFile = whitelistFile self._newSessionIgnorePeriod = newSessionIgnorePeriod self._floodProtectionEnabled = floodProtectionEnabled self._floodProtectionRatio = floodProtectionRatio self._minSessionsToAllowProtection = minSessionsToAllowProtection self._floodProtectionMode = False self._shouldArchiveNoDeliveryTable = shouldArchiveNoDeliveryTable self._shouldArchiveDeliveryTrackingUpdatesToLine = shouldArchiveDeliveryTrackingUpdatesToLine self._deliveryTrackingArchiveTableInterval = deliveryTrackingArchiveTableInterval self._lastTimeArchiveDeliveryTrackingTable = 0 # Archive on sturtup if self._sessions is None or \ self._windowSizeSec != windowSizeSec or \ windowNumFrames != self._windowNumFrames: if self._sessions is not None: self._log("new-window").notice( "Creating new sliding window. All existing sessions will be lost" ) self._sessions = SlidingWindow(windowSizeSec, windowNumFrames) self._windowSizeSec = windowSizeSec self._windowNumFrames = windowNumFrames if self._recentlyBlocked is None or \ self._floodProtectionWindowSizeSec != floodProtectionWindowSizeSec or \ floodProtectionWindowNumFrames != self._floodProtectionWindowNumFrames: if self._recentlyBlocked is not None: self._log("new-flood-prot-window").notice( "Creating new flood protection sliding windows") self._recentlyBlocked = SlidingWindow( floodProtectionWindowSizeSec, floodProtectionWindowNumFrames) self._floodProtectionWindowSizeSec = floodProtectionWindowSizeSec self._floodProtectionWindowNumFrames = floodProtectionWindowNumFrames self._isInitialized = True def isUpdateNeeded(self, prevState, creationTimeSeconds): """The celler is responsible to keep track of the last session state and pass it to handleSession() The caller must know if a call to handleSession is needed because it may need to reload the surrent session state (i.e reload the brownie file) """ if prevState == G_SESSION_STATE_WAIT: return True if creationTimeSeconds is None: return False # Ignore sessions that are newer then the grace period. # PrevState is still none becaue we never actually called handleSession on this session if prevState is None: if time.time( ) * 1000.0 > creationTimeSeconds * 1000.0 + self._newSessionIgnorePeriod: return True return False def handleSession(self, brownieData, prevState): """Count the session in the delivery tracking data structure Each session is counted only one time. When a session changes state from wait-state to active (or to ignored) we need to decrement the wait-state counter. We need to tell the caller wat is the state of the session and expect it to pass it to us in the next call. In the first call for a new session, caller should pass "None" in prevState Args: brownieData: The brownie dictionary prevState: The last known state of this session, returned from a previous call, or "None" for new session Returns: state: The current state of the session. Caller needs to pass it to us in the next call, in "prevState" argument Raises: None """ # If we already determined that we don't have anything to do with this session, ignore it if prevState == G_SESSION_STATE_ERROR: return G_SESSION_STATE_ERROR # Extract session data from the brownie originalClientIp = brownieData.get('originalClientIp') playerId = brownieData.get('deliveryTrackingPlayerId') signatureGroupId = brownieData.get('deliveryTrackingSignatureGroupId') # Make sure we have all the key components if originalClientIp is None or playerId is None or signatureGroupId is None: self._log("missing-key").warning( "Can't compute key. Some of the feilds are missing. Brownie data %s", brownieData) self._counters['deliveryTrackingNumInvalidKeySessions'] += 1 # Ignore this session in the future return G_SESSION_STATE_ERROR # Extract the threshold values waitStateRatioThreshold = brownieData.get( 'deliveryTrackingWaitStateRatioThreshold') minWaitStateToBlock = brownieData.get( 'deliveryTrackingMinWaitStateToBlock') # Set default values if not present if waitStateRatioThreshold is None: waitStateRatioThreshold = G_DEFAULT_WAIT_STATE_RATIO_THRESHOLD self._log("missing-threshold1").warning( "Missing wait-state-ratio threshold in brownie. Set to default %f. Brownie data %s", waitStateRatioThreshold, brownieData) if minWaitStateToBlock is None: minWaitStateToBlock = G_DEFAULT_MIN_WAIT_STATE_TO_BLOCK self._log("missing-threshold2").warning( "Missing min-wait-state-to-block in brownie. Set to default %d. Brownie data %s", minWaitStateToBlock, brownieData) # Compose the session key and counter names key = self._composeKey(originalClientIp, playerId, signatureGroupId) waitStateCounterName = key + ":WAIT" activeStateCounterName = key + ":ACTIVE" # If we don't have ANY history about this key, we treat it as a new key countWait = self._sessions.getCount(waitStateCounterName) countActive = self._sessions.getCount(activeStateCounterName) isNewKey = (countWait == 0 and countActive == 0) # Is it a new session? (seen for the first time by the caller) isNewSession = (prevState is None) # Get the current atate of the session. state = self._getState(brownieData) if state is None: # Could not determine a valid state. Error was already logged. # Ignore this session in the future return G_SESSION_STATE_ERROR # # And now for the interesting stuff... # # New key if isNewKey: # A new key is counted only if it's in wait state if state == G_SESSION_STATE_WAIT: # Enforce size limit if self._sessions.getNumKeys() >= self._maxSessionKeys: self._log("max-sessions-limit").error( "Sessions size limit exceeded. Ignore key %s", key) else: # Increment the wait-state counter. # No point to check thresholds at this moment (too early) self._counters[ 'deliveryTrackingNumSessionsBornInWaitState'] += 1 self._sessions.add(waitStateCounterName, 1) # return the state to the caller so that we can keep track on it untill it gets active (or ignored) return state fixedNow = False nothingChanged = False # Existing key. We already have history about it if isNewSession: # A new session for an existing key is always counted if state == G_SESSION_STATE_WAIT: self._counters[ 'deliveryTrackingNumSessionsBornInWaitState'] += 1 self._sessions.add(waitStateCounterName, 1) elif state == G_SESSION_STATE_ACTIVE: self._counters[ 'deliveryTrackingNumSessionsBornInActiveState'] += 1 self._sessions.add(activeStateCounterName, 1) elif state == G_SESSION_STATE_IGNORED_REDIRECT: self._counters[ 'deliveryTrackingNumSessionsBornInIgnoredRedirectState'] += 1 nothingChanged = True else: nothingChanged = True else: # An existing session needs to be fixed if changed from wait state to active or to ignored-redirect. if prevState == G_SESSION_STATE_WAIT and state == G_SESSION_STATE_ACTIVE: self._counters[ 'deliveryTrackingNumSessionsBecomeActiveState'] += 1 self._sessions.add(waitStateCounterName, -1) self._sessions.add(activeStateCounterName, 1) fixedNow = True elif prevState == G_SESSION_STATE_WAIT and state == G_SESSION_STATE_IGNORED_REDIRECT: self._counters[ 'deliveryTrackingNumSessionsBecomeIgnoredRedirectState'] += 1 self._sessions.add(waitStateCounterName, -1) fixedNow = True else: nothingChanged = True # It's important not to check thresholds if nothing changed (OSC-1600) if nothingChanged: return state # Check it we crossed thresholds self._checkThresholdsAndBlockIfNeeded(waitStateCounterName, activeStateCounterName, waitStateRatioThreshold, minWaitStateToBlock, key, fixedNow) self._log("handle-session").debug3( "Handle session. Key %s. isNewSession %s. isNewKey %s. Curr state %s. Prev state %s. Wait %s. Active %s", key, isNewSession, isNewKey, state, prevState, countWait, countActive) return state def writeNoDeliveryDelta(self, clearFirst=False): """Writes a delta file to Line with all the changes in no-delivery-table since the last successfull call to this function """ # TODO(amiry) - # Most of the times we will not have any updates to this table. # We don't want to traverse the full table every second just to find out that we don't # need to write anything, so we need to set a flag every time we insert a new key to the table. # Only if this flag is set we start the iteration. # # Problem is that we still need to check for expired records even if there are no updates. # To solve this we will keep track of the minimal expiration time and travers when first expiration arrive # If we have too many delta files we postpone the write. (Line is not reading deltas) # This is to protect disk space #files = os.listdir(self._deltaFilesDir) files = glob.glob( os.path.join(self._deltaFilesDir, self._deltaPattern + "*" + self._deltaSuffix)) if len(files) > G_MAX_DELTA_FILES_IN_FOLDER: self._log("too-many-delta-files").error( "Too many delta files in folder %s [%d]. Do not write delta. (Line is down?)", self._deltaFilesDir, len(files)) return self._log("write-delta").debug3("writeNoDeliveryDelta() - Start") numLines = 0 currentTime = time.time() deltaLines = [] keysAdded = [] keysDeleted = [] if clearFirst: numLines += 1 deltaLines.append({ "seqNum": self._deltaSeqNum + numLines, "cmd": "DELETE_ALL", }) for key, entry in self._noDeliveryTable.iteritems(): cmd = None # Important to check expiration BEFORE we check if we need to add the key # because we might be loading an old table from file # # Note that if expiration time is smaller than the delta write interval we might send # DELETE commands to non-existing keys in Line. # Line is supposed to handle a case of deleting a non existing key if currentTime - entry['expiration'] > 0 or entry.get('isDeleted'): if not clearFirst: # If we cleared the table there is no need to send delete commands self._log("write-delta-2").debug3( "writeNoDeliveryDelta() - Append DELETE command on %s", key) cmd = "DELETE" # But we do need to remove the expired/deleted records from the table keysDeleted.append(key) elif not entry['writtenToLine']: self._log("write-delta-2").debug3( "writeNoDeliveryDelta() - Append ADD command on %s", key) cmd = "ADD" keysAdded.append(key) if cmd is not None: key_values = self._splitKey(key) if key_values is None: self._log("key-unpack").error( "Write No Delivery Delta: Error unpacking key %s. Ignore and delete from table", key) keysDeleted.append(key) continue numLines += 1 originalClientIp = key_values[0] playerId = key_values[1] signatureGroupId = key_values[2] deltaLines.append({ "seqNum": self._deltaSeqNum + numLines, "cmd": cmd, "key": { "originalClientIp": originalClientIp, "deliveryTrackingPlayerId": playerId, "deliveryTrackingSignatureGroupId": signatureGroupId } }) # Limit the size of each delta unless in snapshot mode if numLines >= G_DELTA_SIZE_LIMIT: self._log("max-delta-size").notice( "No-Delivery-Delta to big. Break after after %d entry. Will continue next time.", numLines) break if len(keysAdded) == 0 and len(keysDeleted) == 0: self._counters['deliveryTrackingNumDeltaWritesNothingToDo'] += 1 return if self._writeDeltaFile(deltaLines): for key in keysAdded: entry = self._noDeliveryTable[key] entry['writtenToLine'] = True for key in keysDeleted: self._log("key-deleted").notice( "Delete key %s. Either expired or whitelisted", key) del (self._noDeliveryTable[key]) self._log("delta-file-sent").debug1( "Delta file sent. Keys added: %d, Keys deleted: %d, Clear First: %s", len(keysAdded), len(keysDeleted), clearFirst) self._noDeliveryTableChanged = True self._deltaSeqNum += numLines deltaTime = time.time() - currentTime self._totalDeltaWriteTime += deltaTime self._counters['deliveryTrackingNumDeltaWritesSucceeded'] += 1 self._counters['deliveryTrackingNumDeltaLinesWritten'] += numLines self._counters[ 'deliveryTrackingAvgDeltaWriteTime'] = self._totalDeltaWriteTime / ( self._counters['deliveryTrackingNumDeltaWritesSucceeded'] * 1.0) else: # Specific error was already logged self._counters['deliveryTrackingNumDeltaWritesFailed'] += 1 self._log("write-delta-file").error("Could not write delta file") def sendClearNoDeliveryTableCommand(self): """Send a clear command to Line. Do not clear the actual table caller is responsible to call this function if delivery tracking is disabled """ if not self._isInitialized: self._log("uninitialized").warning( "sendClearNoDeliveryTableCommand() called when delivery tracking is not initialized" ) return False deltaLines = [] deltaLines.append({ "seqNum": self._deltaSeqNum + 1, "cmd": "DELETE_ALL", }) if self._writeDeltaFile(deltaLines): self._deltaSeqNum += 1 self._log("clear-table").notice( "Send clear no-delivery-table command to Line") else: # Specific error was already logged self._log("clear-table-error").error( "Can't clear no-delivery table") return False return True def clearNoDeliveryTable(self): self._log("user-clear-table").notice( "Execute user clear table command") if self.sendClearNoDeliveryTableCommand(): self._noDeliveryTable = {} self._noDeliveryTableChanged = True self.saveNoDeliveryTable() self._sessions.reset() def removeKey(self, keyToRemove): self._log("user-remove-key-command").notice( "Execute user remove key command. Regex=", keyToRemove) regex = re.compile(keyToRemove) for key, entry in self._noDeliveryTable.iteritems(): if regex.match(key): self._log("user-remove-key").notice("Remove key", key) entry["isDeleted"] = True self._noDeliveryTableChanged = True self.saveNoDeliveryTable() def saveNoDeliveryTable(self): if not self._noDeliveryTableChanged: self._log("table-not-saved").debug3( "No delivery table not saved. No change") return try: filename = os.path.join(self._dataDir, G_NO_DELIVERY_TABLE_FILE_NAME) a.infra.format.json.writeToFile(self._log, self._noDeliveryTable, filename) self._log("table-saved").debug3( "No delivery table saved to file %s. %d entries", filename, len(self._noDeliveryTable)) self._noDeliveryTableChanged = False currentTime = time.time() if (currentTime - self._lastTimeArchiveDeliveryTrackingTable < self._deliveryTrackingArchiveTableInterval): return except Exception as ex: self._log("write-table-error").error( "Error in write no-delivery table to file %s. %s", filename, ex) return # Archive if self._shouldArchiveNoDeliveryTable and self._archiver: archiveFailed = False archiveFilename = None try: archiveFilename = os.path.join(self._tempDir, os.path.basename(filename)) shutil.copyfile(filename, archiveFilename) if self._archiver.archiveFile(archiveFilename): self._lastTimeArchiveDeliveryTrackingTable = time.time() else: archiveFailed = True except Exception as ex: self._log("error-archive-table").error( "Error in archive no-delivery-table. file %s. %s", archiveFilename, ex) archiveFailed = True if archiveFailed: os.remove(archiveFilename) def loadNoDeliveryTable(self): try: filename = os.path.join(self._dataDir, G_NO_DELIVERY_TABLE_FILE_NAME) self._noDeliveryTable = a.infra.format.json.readFromFile( self._log, filename) self._noDeliveryTableChanged = False except Exception as ex: self._log("read-table-error").warning( "Error in load no-delivery table from file %s. %s", filename, ex) return self._log("table-loaded").notice( "Loading No delivery table from file %s. %d entries", filename, len(self._noDeliveryTable)) # TODO(amiry) - Validate json data. Less important because this is not a user file # Mark all entries to be written to in the next delta. # Whitelisted entries are deleted. # Expired entries will be removed when writing delta (in the normal expiration path). whitelisted = [] for key in self._noDeliveryTable: rules = self._isWhitelisted(key) if rules: self._log("whitelisted-in-load-table").notice( "Load No Delivery Table: Key %s not loaded due to whitelist rules %s", key, rules) whitelisted.append(key) else: self._noDeliveryTable[key]['writtenToLine'] = False for key in whitelisted: del (self._noDeliveryTable[key]) if whitelisted: self._noDeliveryTableChanged = True # Don't wait to the next interval. This will also remove all expired entries. self.writeNoDeliveryDelta(clearFirst=True) def loadWhitelist(self): """Loads the whitelist json file. Should be called upon signal """ if self._whitelistFile is None: self._whitelist = None return filename = os.path.join(self._confDir, self._whitelistFile) try: jsonData = a.infra.format.json.readFromFile(self._log, filename) except Exception as ex: self._log("read-whitelist-error").error( "Error in read whitelist file %s. %s. Whitelist cleared", filename, ex) self._whitelist = None return self._log("load-whitelist").notice("Loading whitelist from file %s", filename) try: # Create a regex which is an "OR" between all strings in the whitelist strings-list, with capture. whitelist = jsonData['whitelist'] for i in range(len(whitelist)): whitelist[i] = "(%s)" % whitelist[i] regex = "|".join(whitelist) self._whitelist = re.compile(regex) except (ValueError, KeyError) as ex: # Since whitelist is a user file, validation is important self._log("invalid-whitelist").error( "Invalid whitelist in json file %s. %s. Whitelist cleared", self._whitelistFile, ex) self._whitelist = None return # Delete all entries that are now whitelisted writeDelta = False for key in self._noDeliveryTable: rules = self._isWhitelisted(key) if rules: self._log("whitelisted-in-load-whitelist").notice( "Load whitelist: Key %s deleted due to whitelist rules %s", key, rules) self._noDeliveryTable[key]['isDeleted'] = True self._noDeliveryTableChanged = True writeDelta = True if writeDelta: self.writeNoDeliveryDelta() def checkFloodProtection(self): if not self._floodProtectionEnabled: self._floodProtectionMode = False return # TODO(amiry) - This number is not accurate because it doesn't reflect the total number of sessions. # Only the sessions that were born in wait state. totalSessions = self._sessions.getNumKeys() if totalSessions == 0: return thresholdCrossed = False currentBlockCount = self._recentlyBlocked.getNumKeys() if currentBlockCount / ( totalSessions * 1.0 ) >= self._floodProtectionRatio and totalSessions >= self._minSessionsToAllowProtection: thresholdCrossed = True # It threshold is crossed and we are not in flood protection mode we enter flood protection if thresholdCrossed and not self._floodProtectionMode: self._log("enter-flood-protection-mode").notice( "Delivery Tracking enter flood protection mode. " "%d keys blocked in the last %d seconds, out of total %d keys. Min to block is %d", currentBlockCount, self._floodProtectionWindowSizeSec, totalSessions, self._minSessionsToAllowProtection) self._floodProtectionMode = True """" CURRENTLY WE DO NOT SEND CLEAR COMMAND. WE JUST WANT TO KNOW ABOUT THIS STATE self.sendClearNoDeliveryTableCommand() """ return # We are not in flood protection mode, and below threshold. We need at least "widow size" of # silence to release the hook if self._floodProtectionMode and not thresholdCrossed and self._sessions.getNumKeys( ) == 0: self._log("leave-flood-protection-mode").notice( "Delivery Tracking leave flood protection mode") self._floodProtectionMode = False def enable(self, startup): if startup: # On startup, also load the no delivery table self.loadNoDeliveryTable() else: # Need to send table to line for entry in self._noDeliveryTable.values(): entry['writtenToLine'] = False self.sendClearNoDeliveryTableCommand() # reload the whitelist self.loadWhitelist() def disable(self): # When disabled we need to send a clear command to Line, # and reset all session counters. self.sendClearNoDeliveryTableCommand() self._sessions.reset() def getNumKeys(self): return self._sessions.getNumKeys() def getNoDeliveryTableSize(self): return len(self._noDeliveryTable) # --- Public for unit test --- def getSessionCountersForUnitTest(self, originalClientIp, playerId, signatureGroupId): key = self._composeKey(originalClientIp, playerId, signatureGroupId) waitStateCounterName = key + ":WAIT" activeStateCounterName = key + ":ACTIVE" return (self._sessions.getCount(waitStateCounterName), self._sessions.getCount(activeStateCounterName)) def getNoDeliveryTableEntryForUnitTest(self, originalClientIp, playerId, signatureGroupId): key = self._composeKey(originalClientIp, playerId, signatureGroupId) return self._noDeliveryTable[key] def getFloodProtectionStateForUnitTest(self): return self._floodProtectionMode def clearSessionKeysForUnitTest(self): return self._sessions.reset() # ---------------------------- def _getState(self, brownieData): wasUsed = brownieData.get('wasUsed', False) ignoredRedirect = brownieData.get('ignoredRedirect', False) if wasUsed and ignoredRedirect: self._log("bad-state").error( "Brownie is used by delivery and marked an ignored-redirect. Impossible! Brownie data %s", brownieData) self._counters['deliveryTrackingNumInvalidStateSessions'] += 1 return None if not wasUsed and not ignoredRedirect: return G_SESSION_STATE_WAIT if not wasUsed: return G_SESSION_STATE_IGNORED_REDIRECT return G_SESSION_STATE_ACTIVE def _checkThresholdsAndBlockIfNeeded(self, waitStateCounterName, activeStateCounterName, waitStateRatioThreshold, minWaitStateToBlock, key, fixedNow): numWait = self._sessions.getCount(waitStateCounterName) numActive = self._sessions.getCount(activeStateCounterName) # This can happen when state changes from wait to active or to ignored redirect. # numWait can be negative because we decrement it when session become active. # If the positive value dropped out of the sliding window we get a negative value. if numWait <= 0: return ratio = numWait / (numWait + numActive * 1.0) if ratio >= waitStateRatioThreshold and numWait >= minWaitStateToBlock: self._blockKey(key) elif fixedNow: self._setShortTTLIfNeeded(key) def _blockKey(self, key): if key not in self._noDeliveryTable: """" CURRENTLY WE DO NOT ENFORCE FLOOD PROTECTION # Enforce flood-protection mode if self._floodProtectionMode: self._counters['deliveryTrackingNumSessionsBlockedInFloodProtection'] += 1 self._log("flood-prot").error("Block key: Key % s not blocked due to flood protection mode") return """ # Enforce size limit if len(self._noDeliveryTable) >= self._maxNoDeliveryTableSize: self._counters[ 'deliveryTrackingNumSessionsBlockedOverSizeLimit'] += 1 self._log("size-limit").error( "Block key: Key % s not blocked due to no-delivery-table size limit %d", key, len(self._noDeliveryTable)) return # Enforce whitelist rules = self._isWhitelisted(key) if rules: self._counters['deliveryTrackingNumSessionsWhitelisted'] += 1 self._log("whitelist").notice( "Block Key: Key %s not blocked due to whitelist rules %s", key, rules) return self._counters['deliveryTrackingNumBlocked'] += 1 expiration = time.time() + self._blockTTLSec self._noDeliveryTable[key] = { 'writtenToLine': False, 'expiration': expiration, 'shortTTL': False } self._noDeliveryTableChanged = True self._recentlyBlocked.add(key, 1) self._log("whitelist").notice("Block Key: Key %s blocked", key) else: # If the key is already blocked with short ttl we need to set it back to the normal TTL if self._noDeliveryTable[key].get('shortTTL'): # Do not rewrite to Line expiration = time.time() + self._blockTTLSec self._noDeliveryTable[key]['shortTTL'] = False self._noDeliveryTable[key]['expiration'] = expiration self._noDeliveryTableChanged = True def _setShortTTLIfNeeded(self, key): if key in self._noDeliveryTable: # Do not rewrite to Line expiration = time.time() + self._shortBlockTTLSec self._noDeliveryTable[key]['shortTTL'] = True self._noDeliveryTable[key]['expiration'] = expiration self._noDeliveryTableChanged = True def _isWhitelisted(self, key): if self._whitelist is not None: # Check against whitelist match = self._whitelist.match(key) if match: rules = [] for g in match.groups(): if g is not None: rules.append(g) return rules return None def _composeKey(self, originalClientIp, playerId, signatureGroupId): # We assume that this delimiter is safe to use (Will mever appear in the data) return ":QWILT:".join([originalClientIp, playerId, signatureGroupId]) def _splitKey(self, key): values = key.split(":QWILT:") if len(values) != 3: return None return values def _writeDeltaFile(self, deltaLines): # First we rotate to get a fresh file name self._deltaFile.rotate() deltaFilename = self._deltaFile.getCurrentFileName() archiveFilename = None # If we need to archive the delta file, we first save a copy. # Remember that the delta file is consumed by the Line and deleted. if self._shouldArchiveDeliveryTrackingUpdatesToLine and self._archiver: try: archiveFilename = os.path.join(self._tempDir, os.path.basename(deltaFilename)) a.infra.format.json.writeToFile(self._log, {"deltaLine": deltaLines}, archiveFilename) except Exception as ex: self._log("error-write-delta-file-for-archive").error( "Error in write delta file for archiving. File %s. %s", archiveFilename, ex) # Now we create the delta file. try: if archiveFilename: shutil.copyfile(archiveFilename, deltaFilename) else: a.infra.format.json.writeToFile(self._log, {"deltaLine": deltaLines}, deltaFilename) except Exception as ex: self._log("error-write-delta-file").error( "Error in write delta file %s. %s", deltaFilename, ex) return False # Finally we archive it if archiveFilename: archiveFailed = False try: if not self._archiver.archiveFile(archiveFilename): archiveFailed = True except Exception as ex: self._log("error-archive-delta-file").error( "Error in archive delta file %s. %s", archiveFilename, ex) archiveFailed = True if archiveFailed: os.remove(archiveFilename) # If we arrived here the delta write was okay. # Archiving doesn't affect the return value return True
def _updateConfig (self, cfg): enabled = cfg.get("enabled") logDir = cfg.get("logdir") self._rotationTimeSec = cfg.get("rotationTimeSec") self._rotationSizeKB = cfg.get("rotationSizeKB") self._unsentQueueMaxSizeMB = cfg.get("unsentQueueMaxSizeMB") self._metaFlushIntervalSec = cfg.get("metaFlushIntervalSec") unsentArchiveDir = cfg.get("unsentArchiveDir") metaArchiveDir = cfg.get("metaArchiveDir") unsentArchiveMaxSizeGB = cfg.get("unsentArchiveMaxSizeGB") metaArchiveMaxSizeGB = cfg.get("metaArchiveMaxSizeGB") self._urlTranslationEnabled = cfg.get("urlTranslation") self._analytics = cfg.get("analytics") if self._analytics: self._log("analytics-mode").notice("CDN reporter %s running in analytics mode", self._name) if enabled: # Rename old tmp files (in case of crash recovery) before we open the new file self._handleOldTmpFiles(logDir) # Check if log dir changed. Need to create a new rotating file if self._logDir and self._logDir != logDir: self._log("log-dir-changed").notice("Log dir for CDN reporter %s changed", self._name) # Close and rename the currently opened file self._closeTranAndMetaFile() self._tranRotatingFile = None self._logDir = logDir if not self._tranRotatingFile: self._tranRotatingFile = RotatingFileSizeEnforcer(self._log, self._logDir, "tran-"+self._name+"-", ".log") self._tranRotatingFile.initFileRotatingPattern( "%s.%s" % (RotatingFileSizeEnforcer.KICK_NUM_4, RotatingFileSizeEnforcer.EPOCH_SECONDS_10)) # Instead of setting max size on the rotating file pattern, we enforce the size limit ourselves. # We will not write new log files if the filder size reaced max size. # Instead, we will pass the tmp fils directly into the unsent files archive. # This is because the unsent files may be opened and can't be deleted. self._tranRotatingFile.prepare() # Restart archivers if needed if unsentArchiveDir != self._unsentArchiveDir or \ metaArchiveDir != self._metaArchiveDir or \ unsentArchiveMaxSizeGB != self._unsentArchiveMaxSizeGB or \ metaArchiveMaxSizeGB != self._metaArchiveMaxSizeGB: self._unsentArchiveDir = unsentArchiveDir self._metaArchiveDir = metaArchiveDir self._unsentArchiveMaxSizeGB = unsentArchiveMaxSizeGB self._metaArchiveMaxSizeGB = metaArchiveMaxSizeGB self._startArchivers() if self._enabled and not enabled: # Disable integration self.disable() self._enabled = False if not self._enabled and enabled: # Enable integration if self._rotateFile(): self._log("cfg-enable").notice("Enabled CDN integration %s", self._name) self._enabled = True else: self._log("cfg-enable-error").notice("Error enabling CDN integration %s", self._name) return True
class CdnReporter (object): def __init__ (self, name): self._name = name self._enabled = False self._analytics = False self._logDir = None self._log = None self._tranRotatingFile = None self._tranFileName = None self._metaFileName = None self._tranFile = None self._fileOpenTime = None self._rotationSizeKB = None self._rotationTimeSec = None self._unsentQueueMaxSizeMB = None self._metaFlushIntervalSec = None self._lastTimeMetaUpdate = None self._urlTranslationEnabled = None self._unsentArchiveDir = None self._metaArchiveDir = None self._unsentArchiveMaxSizeGB = None self._metaArchiveMaxSizeGB = None self._unsentArchiver = None self._metaArchiver = None self._counters = { "cgid": {}, "logDirSizeMB": 0 } self._cgidCurrMeta = {} self._checkRotationCnt = 0 def initLogger (self, logger): self._log = logger.createLoggerSameModule(G_NAME_GROUP_CDN_REPORTER) def init (self, cfg): if not self._updateConfig(cfg): self._log("error-init").warning("Error init CDN reporter %s", self._name) return False return True def disable (self): self._log("cfg-disable").notice("Disable CDN integration %s", self._name) self._closeTranAndMetaFile() self._clearOutQueue() self._stopArchivers() def terminate (self): self._log("terminate").notice("Terminate CDN integration %s", self._name) self._closeTranAndMetaFile() self._stopArchivers() def getCounters (self): self._getTotalDirSizeMB() # Update the "logDirSizeMB" counter return self._counters def _updateConfig (self, cfg): enabled = cfg.get("enabled") logDir = cfg.get("logdir") self._rotationTimeSec = cfg.get("rotationTimeSec") self._rotationSizeKB = cfg.get("rotationSizeKB") self._unsentQueueMaxSizeMB = cfg.get("unsentQueueMaxSizeMB") self._metaFlushIntervalSec = cfg.get("metaFlushIntervalSec") unsentArchiveDir = cfg.get("unsentArchiveDir") metaArchiveDir = cfg.get("metaArchiveDir") unsentArchiveMaxSizeGB = cfg.get("unsentArchiveMaxSizeGB") metaArchiveMaxSizeGB = cfg.get("metaArchiveMaxSizeGB") self._urlTranslationEnabled = cfg.get("urlTranslation") self._analytics = cfg.get("analytics") if self._analytics: self._log("analytics-mode").notice("CDN reporter %s running in analytics mode", self._name) if enabled: # Rename old tmp files (in case of crash recovery) before we open the new file self._handleOldTmpFiles(logDir) # Check if log dir changed. Need to create a new rotating file if self._logDir and self._logDir != logDir: self._log("log-dir-changed").notice("Log dir for CDN reporter %s changed", self._name) # Close and rename the currently opened file self._closeTranAndMetaFile() self._tranRotatingFile = None self._logDir = logDir if not self._tranRotatingFile: self._tranRotatingFile = RotatingFileSizeEnforcer(self._log, self._logDir, "tran-"+self._name+"-", ".log") self._tranRotatingFile.initFileRotatingPattern( "%s.%s" % (RotatingFileSizeEnforcer.KICK_NUM_4, RotatingFileSizeEnforcer.EPOCH_SECONDS_10)) # Instead of setting max size on the rotating file pattern, we enforce the size limit ourselves. # We will not write new log files if the filder size reaced max size. # Instead, we will pass the tmp fils directly into the unsent files archive. # This is because the unsent files may be opened and can't be deleted. self._tranRotatingFile.prepare() # Restart archivers if needed if unsentArchiveDir != self._unsentArchiveDir or \ metaArchiveDir != self._metaArchiveDir or \ unsentArchiveMaxSizeGB != self._unsentArchiveMaxSizeGB or \ metaArchiveMaxSizeGB != self._metaArchiveMaxSizeGB: self._unsentArchiveDir = unsentArchiveDir self._metaArchiveDir = metaArchiveDir self._unsentArchiveMaxSizeGB = unsentArchiveMaxSizeGB self._metaArchiveMaxSizeGB = metaArchiveMaxSizeGB self._startArchivers() if self._enabled and not enabled: # Disable integration self.disable() self._enabled = False if not self._enabled and enabled: # Enable integration if self._rotateFile(): self._log("cfg-enable").notice("Enabled CDN integration %s", self._name) self._enabled = True else: self._log("cfg-enable-error").notice("Error enabling CDN integration %s", self._name) return True def _openTranAndMetaFile (self): # Close the file if it was opened self._closeTranAndMetaFile() if self._tranFile: self._log("file-close-before-open-error").error("Can't open new transactoin log file. Error closing prev. Name %s", self._tranFileName) return False # Get the new file name self._tranRotatingFile.rotate() tranFileName = self._tranRotatingFile.getCurrentFileName() + ".tmp" metaFileName = tranFileName[:-8] + ".meta" self._cgidCurrMeta = {} try: self._tranFile = open(tranFileName, "wt", buffering=1) except IOError as ex: self._log("file-open-error").warning("Error opening transaction file %s. %s", tranFileName, ex) return False self._log("file-opened").notice("Transaction log file opened. Name %s, Meta %s", tranFileName, metaFileName) self._fileOpenTime = time.time() self._tranFileName = tranFileName self._metaFileName = metaFileName self._numTransactionsInCurrFile = 0 return True def _closeTranAndMetaFile (self): closed = False try: if self._tranFile: self._tranFile.close() self._updateMetaFile() self._tranFile = None closed = True except IOError as ex: self._log("tran-ile-close-error").error("Error closing transaction file. Name %s. %s", self._tranFileName, ex) return if not closed: return # Check if we should push the file to the log queue. The total size includes the current file. totalSizeMB = self._getTotalDirSizeMB() if totalSizeMB < self._unsentQueueMaxSizeMB: self._log("file-closed").debug1("Transaction log file closed. Rename to %s", self._tranFileName[:-4]) self._pushToOutQueue(self._tranFileName) else: # Archive file in unsent archive self._log("max-log-queue").notice("Log queue size limit exceeded. Unsent file %s archived. Unsent meta file %s archived", self._tranFileName, self._metaFileName) a.infra.process.logUserMessage(a.api.user_log.msg.export.QueueFull(self._name, self._numTransactionsInCurrFile)) self._archiveFile(self._tranFileName, self._unsentArchiver) # Archive meta file in meta archive self._archiveFile(self._metaFileName, self._metaArchiver) def _rotateFile (self): self._closeTranAndMetaFile() self._tranRotatingFile.rotate() if self._openTranAndMetaFile(): return True return False def _shouldRotate (self): rotateBySize = None rotateByTime = None try: fileSize = os.path.getsize(self._tranFileName) / 1024 except Exception as ex: self._log("getsize-error").error("Error getting tran file size. File %s. %s" % (self._tranFileName, ex)) # Recover. Open a new file self._openTranAndMetaFile() return False if fileSize > self._rotationSizeKB: self._log("rotate-by-size").debug1("Should rotate by size. %d > %d" % (fileSize, self._rotationSizeKB)) rotateBySize = True else: now = time.time() if now - self._fileOpenTime > self._rotationTimeSec: self._log("rotate-by-time").debug1("Should rotate by size. %d > %d" %(now - self._fileOpenTime, self._rotationTimeSec)) rotateByTime = True return (rotateBySize or rotateByTime) def _getTotalDirSizeMB (self): totalSize = 0 if not self._logDir or not os.path.exists(self._logDir): return 0 for dirpath, dirnames, filenames in os.walk(self._logDir): for f in filenames: # We only want to count ".bz" files if len(f) > 4 and f[-4:] == '.bz2': fp = os.path.join(dirpath, f) totalSize += os.path.getsize(fp) totalSizeMB = totalSize / (1024.0*1024.0) self._counters["logDirSizeMB"] = int(totalSizeMB) return totalSizeMB def _updateStatsCounters (self, cgid, downloadedContentBytes): # Update the total stats counters self._updatePerCgidData(self._counters["cgid"], cgid, downloadedContentBytes) # Update the meta counters (reset per meta file) self._updatePerCgidData(self._cgidCurrMeta, cgid, downloadedContentBytes) # Update meta file once in a while currentTime = time.time() if (self._lastTimeMetaUpdate == None) or (currentTime - self._lastTimeMetaUpdate >= self._metaFlushIntervalSec): # Update time even if we fail. Don't want to try on every transaction self._lastTimeMetaUpdate = currentTime self._updateMetaFile() def _updatePerCgidData (self, aDict, cgid, volume): perCgidData = aDict.get(cgid) if perCgidData is None: aDict[cgid] = {"count":1, "volume":volume} else: aDict[cgid]["count"] += 1 aDict[cgid]["volume"] += volume def _updateMetaFile (self): try: a.infra.format.json.writeToFile(self._log, self._cgidCurrMeta, self._metaFileName) except Exception as ex: self._log("error-update-mata").error("Error updating meta file %s. Data %s. %s" % (self._metaFileName, self._cgidCurrMeta, ex)) def _handleOldTmpFiles (self, logDir): # TODO(amiry) - Since this is done very early in the init process, # it is a little difficult to properly handle handle old tmp files properly. # Currently we just delete them self._killPattern(logDir, "tran-*.log") self._killPattern(logDir, "tran-*.log.tmp") self._killPattern(logDir, "tran-*.log.meta") def _killPattern (self, logDir, pattern): fileList = glob.glob(os.path.join(logDir, pattern)) for fileName in fileList: fileName = os.path.join(logDir, fileName) self._log("delete-old-tmp-file").debug1("Delete old tmp file %s" % fileName) try: os.unlink(fileName) except Exception as ex: self._log("error-delete").error("Error deleting file %s. %s" % (fileName, ex)) def _clearOutQueue (self): # TODO(amiry) - It is possible that archiving will fail because logpusher sent this file already. fileList = glob.glob(os.path.join(self._logDir, "tran-*.log.bz2")) for fileName in fileList: self._log("clear-out-queue").notice("Clear log queue: archive unsent file %s" % fileName) self._archiveFile(fileName, self._unsentArchiver) def _pushToOutQueue (self, tmpFileName): logFileName = tmpFileName[:-4] try: # Remove the ".tmp" suffix os.rename(tmpFileName, logFileName) except Exception as ex: self._log("rename-tmp-file-failed").error("Error renaming tmp file %s. %s" % (tmpFileName, ex)) return # We compress the files ourselves instead of letting logpusher doing that. # This is bacause # 1. Logpusher's bad behaviour in case of connectivity problem. (Keeps finding and compressing the same # file over and over again) # 2. Maintain a smaller pickup queue. Do not assume that logpusher failed to compress. self._log("bzip-log-file").debug1("bzip2 %s" % logFileName) subprocess.call("bzip2 %s" % logFileName, shell=True) if os.path.exists(logFileName): self._log("bzip-error").warning("Error bzipping file %s. Delete file" % logFileName) os.unlink(logFileName) else: self._log("pushed").info("File pushed to log queue %s" % logFileName) """ try: # And remove the log file os.unlink(logFileName) except Exception as ex: self._log("rename-tmp-file-failed").error("Error removing log file %s. %s" % (logFileName, ex)) """ def _archiveFile (self, fileName, archiver): shouldCheckIfExists = False shouldRemove = False try: if not archiver.archiveFile(fileName): shouldCheckIfExists = True except Exception as ex: self._log("failed-archive").error("Failed to archive file '%s'. exception %s", fileName, ex) shouldCheckIfExists = True if shouldCheckIfExists: if os.path.exists(fileName): shouldRemove = True if shouldRemove: try: self._log("remove-file").debug1("Remove file '%s'", fileName) os.remove(fileName) except Exception as ex: self._log("failed-remove").error("Failed to remove file '%s'. exception %s", fileName, ex) def _startArchivers (self): # Start the unsent archiver if self._unsentArchiver: self._unsentArchiver.stop() self._unsentArchiver = None self._unsentArchiver = a.infra.file.rotating_file_archiver.Archiver( logger = self._log, inputDir = self._logDir, bufferDirSizeLimitInMB = 1024, outputDir = self._unsentArchiveDir, \ bufferDir = "unsent_archiver_buffer") self._unsentArchiver.setUseFixedFileSize(4096) self._unsentArchiver.setOutputDirSizeLimitGB(self._unsentArchiveMaxSizeGB) self._unsentArchiver.setRotationTimeThersholdSeconds(60) self._unsentArchiver.start(self._archiverThreadExceptionCallBack) # Start the meta archiver if self._metaArchiver: self._metaArchiver.stop() self._metaArchiver = None self._metaArchiver = a.infra.file.rotating_file_archiver.Archiver( logger = self._log, inputDir = self._logDir, bufferDirSizeLimitInMB = 1024, outputDir = self._metaArchiveDir, \ bufferDir = "meta_archiver_buffer") self._metaArchiver.setUseFixedFileSize(4096) self._metaArchiver.setOutputDirSizeLimitGB(self._metaArchiveMaxSizeGB) self._metaArchiver.setRotationTimeThersholdSeconds(60) self._metaArchiver.start(self._archiverThreadExceptionCallBack) def _stopArchivers (self): # Stop the unsent archiver if self._unsentArchiver: self._unsentArchiver.stop() # Stop the meta archiver if self._metaArchiver: self._metaArchiver.stop() def _archiverThreadExceptionCallBack (self, exception): self._log("archvier-thread-exception").error("Got exception '%s' from the archiver thread. Terminate reporter %s" % (exception, self._name)) self.terminate() def enabled (self): return self._enabled or self._analytics
def init(self, countersDBInfo, jobsQueue, logger, baseLogger, dataRunDirPath, descriptionDbFilesPath, dbFilesOutputPath, dbFilesBaseName, rrdtoolExePath, rrdtoolWrapperExePath, oldDataDir, prevRunDir, updateBurstCount, updateBurstIntervalMsec, enableRecovery): #Initialize memeber variables self.myJobsQueue = jobsQueue self.myLogger = logger self.myDescriptionDbFilesPath = descriptionDbFilesPath self.myDataRunDirPath = dataRunDirPath self.mydbRRdFilesOutputPath = dbFilesOutputPath self.myOldDataDir = oldDataDir self.myPrevRunDir = prevRunDir self.__makeDirsIfNeeded(self.myPrevRunDir) self.myUpdateBurstCount = updateBurstCount self.myUpdateBurstInterval = updateBurstIntervalMsec / 1000.0 self.myEnableRecovery = enableRecovery # The history files rotating file self.myHistoryFilePattern = "update-" self.myHistoryFileSuffix = ".csv" self.myHistoryRotatingFile = RotatingFileSizeEnforcer( baseLogger, self.myPrevRunDir, self.myHistoryFilePattern, self.myHistoryFileSuffix) self.myHistoryRotatingFile.prepare() #Create meta-data DB #dbTimeString = datetime.now() #Part of the file name is a date string dbTimeString = my_datetime.now( ) #Part of the file name is a date string self.myDescriptionsDB = SQLiteDescriptorsDB() #Search for an old DB file sqliteDB = [ f for f in os.listdir(descriptionDbFilesPath) if f.lower().endswith(SQLITE_EXTENTION) ] rrdDBs = self.getPreviousRrdPaths(dbFilesOutputPath) if len(sqliteDB) == 0: if len(rrdDBs) > 0: self.myLogger.notice( "StatsAggregator - sqlite file wasn't found while " + str(len(rrdDBs)) + " rrd files exist") self.__copyDbsToOldDir(dbTimeString) else: self.myLogger.debug5( "StatsAggregator - sqlite and RRD files doesn't exist - ok" ) else: if len(rrdDBs) == 0: self.myLogger.notice( "StatsAggregator - sqlite file found while no rrd files exist" ) self.__copyDbsToOldDir(dbTimeString) else: self.myLogger.debug5( "StatsAggregator - RRD and sqlite files doesn't exist - ok" ) self.myDescriptionsDB.init(self.myLogger, descriptionDbFilesPath, dbFilesBaseName) #Distinguish between old and new counters previousConfigurations = self.myDescriptionsDB.getConfigurationsFromDB( ) if previousConfigurations is None: #notice Error self.myLogger.notice( "StatsAggregator - existing sqlite file is malformed") self.__copyDbsToOldDir(dbTimeString) previousConfigurations = [] self.myLogger.debug2("StatsAggregator old configuration DB loaded") #Get only the new counters out of all the configured ones newCounters = self.diffAgainstNewConfig(countersDBInfo, previousConfigurations) if newCounters is None: #There should be no counter changes, if one is found, we copy all previous data to a temp directory and create new DBs self.myLogger.notice( "An ilegal change was made in the configurations file - all previous data copied to " + self.myOldDataDir + ". Creating new stats DBs") self.__copyDbsToOldDir(dbTimeString) #All counters are considered new newCounters = countersDBInfo #Now we need to reCreate the self.myDescriptionsDB sience we moved the file self.myDescriptionsDB = SQLiteDescriptorsDB() self.myDescriptionsDB.init(self.myLogger, descriptionDbFilesPath, dbFilesBaseName) previousConfigurations = [] if len(previousConfigurations) > 0: previousCountersDbPathList = self.myDescriptionsDB.getPreviousDbPath( ) if len(previousCountersDbPathList) != 1: self.myLogger.error( "StatsAggregator - too many sqlite files found") self.__copyDbsToOldDir(dbTimeString) newCounters = countersDBInfo self.myLogger.debug2( "StatsAggregator - previous counters configuration was empty - creating a new SQLite DB" ) errFlag = self.myDescriptionsDB.createDB( countersDBInfo, dbTimeString, newCounters) if errFlag: self.myLogger.error( "StatsAggregator - create db state 1 failed") return errFlag else: previousCountersDbPath = self.myFileName = os.path.join( descriptionDbFilesPath, previousCountersDbPathList[0]) #If we are here then this is not the first run on this machine if newCounters is None: self.myLogger.error( "StatsAggregator - error occured while comparing counters from sqlite file and new configured ones" ) return 3215374 if len(newCounters) == 0: self.myLogger.debug2( "StatsAggregator - no new counters configured") errFlag = self.myDescriptionsDB.createDB( countersDBInfo, dbTimeString, {}, previousCountersDB=previousCountersDbPath) if 0 < errFlag: self.myLogger.error( "StatsAggregator - create db state 2 failed") return errFlag else: self.myLogger.debug2( "StatsAggregator - %d new counters configured" % len(newCounters)) errFlag = self.myDescriptionsDB.createDB( countersDBInfo, dbTimeString, newCounters, previousCountersDB=previousCountersDbPath) if 0 < errFlag: self.myLogger.error( "StatsAggregator - create db state 3 failed") return errFlag #If we are hear then 1. It is not the first run 2. Some coutners were added to DB in previous run and wasn't updated self.myLogger.debug2("StatsAggregator - updating old counters") errFlag = self.myDescriptionsDB.updateDB( previousConfigurations) if 0 < errFlag: self.myLogger.error( "StatsAggregator - updating previous counters UI settings failed" ) return errFlag errFlag = self.myDescriptionsDB.updateDB(newCounters) if 0 < errFlag: self.myLogger.error( "StatsAggregator - updating new counters UI settings failed" ) return errFlag else: newCounters = countersDBInfo #If we are here then this is probably the first run on this machine or someone deleted all previous data self.myLogger.debug2( "StatsAggregator - previous counters configuration was empty - creating a new SQLite DB" ) errFlag = self.myDescriptionsDB.createDB(countersDBInfo, dbTimeString, newCounters) if 0 < errFlag: self.myLogger.error( "StatsAggregator - create db state 4 failed") return errFlag #Create values DB self.myValuesDB = RRDToolValuesDB() self.myValuesDB.init(self.myDescriptionsDB, self.myLogger, self.myDataRunDirPath, dbFilesOutputPath, dbFilesBaseName, rrdtoolExePath, rrdtoolWrapperExePath) errFlag = self.myValuesDB.createDB(countersDBInfo, dbTimeString) if errFlag: self.myLogger.error("StatsAggregator - create db state 5 failed") return errFlag #SQLite won't allow us to use this cursur from other threads. self.myDescriptionsDB.closeDBConnection() self.myRunFlag = True threading.Thread.__init__(self) return 0
class StatsAggregator(threading.Thread): def __init__(self): pass #Finds previous rrd file paths def getPreviousRrdPaths(self, rrdDirPath): list_of_files = [] for root, dirs, files in os.walk(rrdDirPath): for f in files: if f.endswith(RRD_EXTENTION): list_of_files.append(f) return list_of_files def __copyDbsToOldDir(self, dbTimeString): tempPath = os.path.join( self.myOldDataDir, str(dbTimeString.strftime(FILE_NAME_TIMESTAMP_FORMAT))) self.__delDirsIfNeeded(tempPath) self.__moveSubDirsToDir(self.mydbRRdFilesOutputPath, tempPath) self.__moveFilesToDir(self.myDescriptionDbFilesPath, tempPath) self.__createCompressedTar(tempPath, tempPath + ".gzip") #Clean self.__delDirsIfNeeded(tempPath) #countersDBInfo - A dictionary that holds key = counter ID value = counter descriptor object #jobsQueue - The queue for the design pattern used, through it the sampled counters are sent to the aggregator to be stored #logger - dah #dbFilesOutputPath - The path where the db files will be stored #dbFilesBaseName - The prefix for the db files' names #rrdtoolExePath - The path to the RRDTool .exe file def init(self, countersDBInfo, jobsQueue, logger, baseLogger, dataRunDirPath, descriptionDbFilesPath, dbFilesOutputPath, dbFilesBaseName, rrdtoolExePath, rrdtoolWrapperExePath, oldDataDir, prevRunDir, updateBurstCount, updateBurstIntervalMsec, enableRecovery): #Initialize memeber variables self.myJobsQueue = jobsQueue self.myLogger = logger self.myDescriptionDbFilesPath = descriptionDbFilesPath self.myDataRunDirPath = dataRunDirPath self.mydbRRdFilesOutputPath = dbFilesOutputPath self.myOldDataDir = oldDataDir self.myPrevRunDir = prevRunDir self.__makeDirsIfNeeded(self.myPrevRunDir) self.myUpdateBurstCount = updateBurstCount self.myUpdateBurstInterval = updateBurstIntervalMsec / 1000.0 self.myEnableRecovery = enableRecovery # The history files rotating file self.myHistoryFilePattern = "update-" self.myHistoryFileSuffix = ".csv" self.myHistoryRotatingFile = RotatingFileSizeEnforcer( baseLogger, self.myPrevRunDir, self.myHistoryFilePattern, self.myHistoryFileSuffix) self.myHistoryRotatingFile.prepare() #Create meta-data DB #dbTimeString = datetime.now() #Part of the file name is a date string dbTimeString = my_datetime.now( ) #Part of the file name is a date string self.myDescriptionsDB = SQLiteDescriptorsDB() #Search for an old DB file sqliteDB = [ f for f in os.listdir(descriptionDbFilesPath) if f.lower().endswith(SQLITE_EXTENTION) ] rrdDBs = self.getPreviousRrdPaths(dbFilesOutputPath) if len(sqliteDB) == 0: if len(rrdDBs) > 0: self.myLogger.notice( "StatsAggregator - sqlite file wasn't found while " + str(len(rrdDBs)) + " rrd files exist") self.__copyDbsToOldDir(dbTimeString) else: self.myLogger.debug5( "StatsAggregator - sqlite and RRD files doesn't exist - ok" ) else: if len(rrdDBs) == 0: self.myLogger.notice( "StatsAggregator - sqlite file found while no rrd files exist" ) self.__copyDbsToOldDir(dbTimeString) else: self.myLogger.debug5( "StatsAggregator - RRD and sqlite files doesn't exist - ok" ) self.myDescriptionsDB.init(self.myLogger, descriptionDbFilesPath, dbFilesBaseName) #Distinguish between old and new counters previousConfigurations = self.myDescriptionsDB.getConfigurationsFromDB( ) if previousConfigurations is None: #notice Error self.myLogger.notice( "StatsAggregator - existing sqlite file is malformed") self.__copyDbsToOldDir(dbTimeString) previousConfigurations = [] self.myLogger.debug2("StatsAggregator old configuration DB loaded") #Get only the new counters out of all the configured ones newCounters = self.diffAgainstNewConfig(countersDBInfo, previousConfigurations) if newCounters is None: #There should be no counter changes, if one is found, we copy all previous data to a temp directory and create new DBs self.myLogger.notice( "An ilegal change was made in the configurations file - all previous data copied to " + self.myOldDataDir + ". Creating new stats DBs") self.__copyDbsToOldDir(dbTimeString) #All counters are considered new newCounters = countersDBInfo #Now we need to reCreate the self.myDescriptionsDB sience we moved the file self.myDescriptionsDB = SQLiteDescriptorsDB() self.myDescriptionsDB.init(self.myLogger, descriptionDbFilesPath, dbFilesBaseName) previousConfigurations = [] if len(previousConfigurations) > 0: previousCountersDbPathList = self.myDescriptionsDB.getPreviousDbPath( ) if len(previousCountersDbPathList) != 1: self.myLogger.error( "StatsAggregator - too many sqlite files found") self.__copyDbsToOldDir(dbTimeString) newCounters = countersDBInfo self.myLogger.debug2( "StatsAggregator - previous counters configuration was empty - creating a new SQLite DB" ) errFlag = self.myDescriptionsDB.createDB( countersDBInfo, dbTimeString, newCounters) if errFlag: self.myLogger.error( "StatsAggregator - create db state 1 failed") return errFlag else: previousCountersDbPath = self.myFileName = os.path.join( descriptionDbFilesPath, previousCountersDbPathList[0]) #If we are here then this is not the first run on this machine if newCounters is None: self.myLogger.error( "StatsAggregator - error occured while comparing counters from sqlite file and new configured ones" ) return 3215374 if len(newCounters) == 0: self.myLogger.debug2( "StatsAggregator - no new counters configured") errFlag = self.myDescriptionsDB.createDB( countersDBInfo, dbTimeString, {}, previousCountersDB=previousCountersDbPath) if 0 < errFlag: self.myLogger.error( "StatsAggregator - create db state 2 failed") return errFlag else: self.myLogger.debug2( "StatsAggregator - %d new counters configured" % len(newCounters)) errFlag = self.myDescriptionsDB.createDB( countersDBInfo, dbTimeString, newCounters, previousCountersDB=previousCountersDbPath) if 0 < errFlag: self.myLogger.error( "StatsAggregator - create db state 3 failed") return errFlag #If we are hear then 1. It is not the first run 2. Some coutners were added to DB in previous run and wasn't updated self.myLogger.debug2("StatsAggregator - updating old counters") errFlag = self.myDescriptionsDB.updateDB( previousConfigurations) if 0 < errFlag: self.myLogger.error( "StatsAggregator - updating previous counters UI settings failed" ) return errFlag errFlag = self.myDescriptionsDB.updateDB(newCounters) if 0 < errFlag: self.myLogger.error( "StatsAggregator - updating new counters UI settings failed" ) return errFlag else: newCounters = countersDBInfo #If we are here then this is probably the first run on this machine or someone deleted all previous data self.myLogger.debug2( "StatsAggregator - previous counters configuration was empty - creating a new SQLite DB" ) errFlag = self.myDescriptionsDB.createDB(countersDBInfo, dbTimeString, newCounters) if 0 < errFlag: self.myLogger.error( "StatsAggregator - create db state 4 failed") return errFlag #Create values DB self.myValuesDB = RRDToolValuesDB() self.myValuesDB.init(self.myDescriptionsDB, self.myLogger, self.myDataRunDirPath, dbFilesOutputPath, dbFilesBaseName, rrdtoolExePath, rrdtoolWrapperExePath) errFlag = self.myValuesDB.createDB(countersDBInfo, dbTimeString) if errFlag: self.myLogger.error("StatsAggregator - create db state 5 failed") return errFlag #SQLite won't allow us to use this cursur from other threads. self.myDescriptionsDB.closeDBConnection() self.myRunFlag = True threading.Thread.__init__(self) return 0 def setAggrHistory(self, aggrHistory): self.myAggrHistory = aggrHistory self.myNumHistoryFiles = 0 def setNumHistoryFiles(self, numHistoryFiles): self.myNumHistoryFiles = numHistoryFiles def loadHistoryFiles(self): # Get a list of history files and sort them in an alphnumeric order. # Since kick number and date are encoded in the file name we will get # the files in creation order historyFiles = os.listdir(self.myPrevRunDir) historyFiles.sort() self.myLogger.notice( "StatsAggregator load history files. Found %d files in prev run folder %s" % (len(historyFiles), self.myPrevRunDir)) # Sort the list of files for fileName in historyFiles: self.myLogger.debug1("StatsAggregator loading history file %s", fileName) fullName = os.path.join(self.myPrevRunDir, fileName) with open(fullName) as f: lineNum = 0 for line in f: try: lineNum += 1 (name, value, timestamp, valueType) = self.__parseHistoryLine(line.rstrip()) self.myAggrHistory.update(name, value, valueType, timestamp) except Exception as ex: self.myLogger.error( "Error loading line %d in file %s. Discard rest of the file. %s" % (lineNum, fileName, ex)) break self.myLogger.notice("StatsAggregator finished loading history files") def saveHistoryFile(self, allUpdatesStr): self.myHistoryRotatingFile.rotate() fileName = self.myHistoryRotatingFile.getCurrentFileName() self.myLogger.debug1("StatsAggregator save history file %s" % fileName) try: with open(fileName, 'w') as f: f.write(allUpdatesStr) except Exception as ex: self.myLogger.error("Error write update history to file %s. %s" % (fileName, ex)) # To enforce limit on number of history files we delete the oldest files. historyFiles = os.listdir(self.myPrevRunDir) historyFiles.sort() numFiles = len(historyFiles) numToDelete = numFiles - self.myNumHistoryFiles idx = 0 self.myLogger.debug3( "StatsAggregator enforce history limit: found %s, allowed %d" % (numFiles, self.myNumHistoryFiles)) while numToDelete > 0: fullPath = os.path.join(self.myPrevRunDir, historyFiles[idx]) try: self.myLogger.debug3( "StatsAggregator enforce history limit: remove %s" % historyFiles[idx]) os.remove(fullPath) except Exception as ex: self.myLogger.error( "Enforce history limit: Error delete file %s. %s" % (fullPath, ex)) idx += 1 numToDelete -= 1 # This is the module's main function. It dequeues the jobs from the queue and execute them # The jobs ***MUST*** be executed synchronically since in the future the counters might change # and a new file has to be created before inserting any value def run(self): self.myLogger.notice("Stats Aggregator running") reportLimiter = EventReportLimiter(3600) try: self.myRunFlag = True self.myDescriptionsDB.connectToDb() while (self.myRunFlag): self.myLogger.debug2( "StatsAggregator blocking in order to dequeue. Time = %d" % time.time()) job = self.__dequeue() if None == job: self.myLogger.debug1( "StatsAggregator dequeued a job from jobs queue. Message is None -> unblock message" ) continue self.myLogger.debug2( "StatsAggregator message dequeued successfully. Time = %d" % time.time()) # If this is a sample result job (The only legal message for v1.0) startTime = time.time() if (job.quack() == stats_queue_objects.AggregationQueueJobType.VALUES): self.myLogger.debug2("StatsAggregator received values job") # Iterate on all received counters and update global aggr dictionary. # Respect counter burst params allUpdatesStr = "" # This will be saved to a history file counterIdx = 0 numCounters = len(job.myCountersArray) while counterIdx < numCounters: burstStartTime = time.time() self.myAggrHistory.lock() i = 0 self.myLogger.debug3("Start burts") while i < self.myUpdateBurstCount: counterVal = job.myCountersArray[counterIdx] self.myLogger.debug5( "Updating couner id %s values (%s, %s)", counterVal.myCounterId, counterVal.myValue, counterVal.myTimeStamp) valueType = type(counterVal.myValue).__name__ self.myAggrHistory.update(counterVal.myCounterId, counterVal.myValue, valueType, counterVal.myTimeStamp) allUpdatesStr += self.__formatHistoryLine( counterVal.myCounterId, counterVal.myValue, counterVal.myTimeStamp, valueType) i += 1 counterIdx += 1 if counterIdx == numCounters: break self.myAggrHistory.unlock() burstTime = time.time() - burstStartTime self.myLogger.debug3("End burts") # Warn about a long burst, or count it for the next warning maxAllowedBurstTime = self.myUpdateBurstInterval * 0.75 if burstTime > maxAllowedBurstTime: (shouldReport, numEvents) = reportLimiter.shouldReport() if shouldReport: self.myLogger.warning( "Aggregator update burst took too long: %s seconds, max allowed is %s. (Occured %s times since last reported)", burstTime, maxAllowedBurstTime, numEvents) # If we have more counters, sleep if counterIdx < numCounters: remainingTime = self.myUpdateBurstInterval - burstTime if remainingTime > 0: time.sleep(remainingTime) self.saveHistoryFile(allUpdatesStr) insertTimeSpan = time.time() - startTime if insertTimeSpan > MAX_INSERT_TO_DB_TIME_SPAN: self.myLogger.warning( "Execute job exceeds the time limit. Limit is %d sec. Elapsed time is %s sec, numCounters=%s. Queue size is: %d", MAX_INSERT_TO_DB_TIME_SPAN, insertTimeSpan, numCounters, self.myJobsQueue._qsize()) else: self.myLogger.notice( "Execute job. Elapsed time is %s sec, numCounters=%s. Queue size is: %d", insertTimeSpan, numCounters, self.myJobsQueue._qsize()) if self.myRunFlag: self.myLogger.notice( "Stats Aggregator thread ended enexpectedly") else: self.myLogger.notice("Stats Aggregator thread ended") self.myRunFlag = False except: #Loop ended - thread is shutting down self.myRunFlag = False self.myLogger.error( "UNExpected exception on Stats Aggregator. Exception info: %s" % stats_queue_objects.formatExceptionInfo()) #When the while loop ended = shutdown - close the DBs self.myValuesDB.closeDBConnection() self.myDescriptionsDB.closeDBConnection() self.myLogger.debug2("StatsAggregator thread ended") def __formatHistoryLine(self, name, value, timestamp, valueType): return "%s,%s,%s,%s\n" % (name, value, timestamp, valueType) def __parseHistoryLine(self, line): (name, strValue, timestamp, valueType) = line.split(',') floatTimestamp = None if timestamp != 'None': floatTimestamp = float(timestamp) value = strValue if valueType == 'int': value = int(strValue) elif value == 'float': value = float(strValue) # name is actually a numeric counter id return (int(name), value, floatTimestamp, valueType) # creates a compressed tar file containing all the files specified + the temp directory def __createCompressedTar(self, directory, tarOutputFilePath): try: tarResultPath = tarOutputFilePath tar = tarfile.open(tarResultPath, TAR_GZIP_FLAG) #tar.dereference = True - This is too dangerous - Can link to huge files/directories - removed on purpuse - don't add this! except: self.myLogger.error( "Exception occured while creating the Tar file - tar file can't be created" + stats_queue_objects.formatExceptionInfo()) return False try: tar.add(directory) except: self.myLogger.error( "Exception occured while creating the Tar file" + stats_queue_objects.formatExceptionInfo()) return False try: tar.close() except: self.myLogger.error( "Exception occured while creating the Tar file - tar file can't be closed" + stats_queue_objects.formatExceptionInfo()) return False self.myLogger.debug3("Tar file created successfully") return True def __delDirsIfNeeded(self, dirPath): try: if os.path.exists(dirPath): if os.path.isdir(dirPath): shutil.rmtree(dirPath) except: self.myLogger.error( "UNExpected exception on Stats Aggregator.__delDirsIfNeeded. Exception info: %s" % stats_queue_objects.formatExceptionInfo()) def __makeDirsIfNeeded(self, dirPath): try: if os.path.exists(dirPath): if os.path.isdir(dirPath): return True else: return False else: oldUmask = os.umask(0) os.makedirs(dirPath) os.umask(oldUmask) return True except: return False def __moveSubDirsToDir(self, srcDir, dstDir): try: self.__makeDirsIfNeeded(dstDir) listOfFiles = os.listdir(srcDir) for d in listOfFiles: srcSubDir = os.path.join(srcDir, d) if srcSubDir == dstDir: continue if os.path.isdir(srcSubDir): shutil.move(srcSubDir, dstDir) except: self.myLogger.error( "UNExpected exception on Stats Aggregator.__moveSubDirsToDir. Exception info: %s" % stats_queue_objects.formatExceptionInfo()) def __moveFilesToDir(self, srcDir, dstDir): self.__makeDirsIfNeeded(dstDir) for root, dirs, files in os.walk(srcDir): for f in files: srcFile = os.path.join(srcDir, f) shutil.copy(srcFile, dstDir) os.remove(srcFile) #The user has to check queue size before def __dequeue(self): result = self.myJobsQueue.get(True) return result def isRunning(self): #return True; return self.myRunFlag def end(self): self.myLogger.notice("Stats Aggregator request to end thread") self.myRunFlag = False self.myJobsQueue.put(None) def setLogger(self, logger): self.myLogger = None self.myLogger = logger self.myDescriptionsDB.setLogger(logger) self.myValuesDB.setLogger(logger) #returns the changes between the new configuration and the old one #If there is an ilegal change logs and exits def diffAgainstNewConfig(self, newConfig, previousConfig): if newConfig is None: self.myLogger.error( "Unexpected error occured during comparison between new and old configurations" ) return None #This means there aren't any previous configurations if len(previousConfig) == 0: return newConfig #New added counters are not compared newCounters = {} #Go over all counters in the new configurations for counter in newConfig.values(): #If the counter is new - doesn't exist in previous configuration if counter.myCounterId in previousConfig.keys(): #Check that it wasn't changed. If not mark it as new, otherwise - fail - it is ilegal to change counters without deleting the history / changing directories if counter != previousConfig[counter.myCounterId]: #If you got here it means that an ilegal diff was found self.myLogger.error( "An ilegal change was made in the configurations file. Counter Info: %s" % counter.toString()) return None else: self.myLogger.debug3("New counter found. Counter = %s" % counter.toString()) newCounters[counter.myCounterId] = counter #We don't check for missing counters since it is legal to disable counters return newCounters
class LogpusherAdapter: #----------------------------------------------------------------------------------------------------------------------- def __init__ (self, logger, logpusherExecutable, monitorListenPort, forceIfaceToBind, historyDirMaxSizeMB): self._log = logger.createLoggerSameModule(G_GROUP_NAME_LOGPUSHER_ADAPTER) self._logpusherExecutable = logpusherExecutable # TODO(amiry) - Currently we support only one instance of logpusher. # When we want to support more, the listen port should be allocated in the reporting manager self._monitorListenPort = monitorListenPort self._forceIfaceToBind = forceIfaceToBind self._historyDirMaxSizeMB = historyDirMaxSizeMB self._logpusher = None self._startCnt = 0 self._logType = None self._logDir = None self._confDir = None self._historyDir = None self._configFile = None self._shouldRestart = False self._currIfaceName = None self._gotFirstNetworkConfig = False self._gracefullyStopped = True # Name is initialized wnen export configuration initialized (and changed). # Network configuration is loaded from a dirrerent thread and might be initialized before # the export is initialized self._name = "[uninitialized]" # We need to synch the network configuration change and the watchdog. # Other configuration changes is also synched by the export_list_manager # self._netConfigurationLock = threading.RLock() self._netConfigurationLock = threading.Lock() #----------------------------------------------------------------------------------------------------------------------- def updateConfig (self, logType, logDir, confDir, historyDir, name): self._log("update-config").debug1("Update config (%s)" % self._name) self._logType = logType self._logDir = logDir self._confDir = confDir self._historyDir = historyDir self._configFile = os.path.join(self._confDir, "logpusher.conf") self._pidFile = os.path.join(self._confDir, "logpusher.pid") self._name = name self._shouldRestart = True # File size enforcer on the history dir self._historySizeEnforcer = RotatingFileSizeEnforcer(self._log, self._logDir, "tran-"+self._name+"-", ".log.bz") self._historySizeEnforcer.initFileRotatingPattern( "%s.%s" % (RotatingFileSizeEnforcer.KICK_NUM_4, RotatingFileSizeEnforcer.EPOCH_SECONDS_10)) self._historySizeEnforcer.setTotalSize(self._historyDirMaxSizeMB * 1024 * 1024) self._lastHistoryDirSizeEnforce = 0 #----------------------------------------------------------------------------------------------------------------------- def updateNetworkConfig (self, newIfaceList): self._log("update-network-config").debug1("Update network config (%s). New config %s" % (self._name, newIfaceList)) self._gotFirstNetworkConfig = True ifaceChanged = False # Need to change iface configChanged = False # Only need to restart if self._currIfaceName: # Check if the configuration of the active iface changed newIface = newIfaceList.get(self._currIfaceName) if newIface is None: self._log("curr-iface-not-exist").notice("Curr iface doesn't exist in new network configuration (%s). Curr iface %s, new iface list %s" % (self._name, self._currIfaceName, newIfaceList)) ifaceChanged = True else: if not self._isIfaceActive(newIface): self._log("curr-iface-not-active").notice("Curr iface is not active in new network configuration (%s). Curr iface %s, new iface %s" % (self._name, self._currIfaceName, newIface)) ifaceChanged = True else: if self._currIface["ipv4Address"] != newIface["ipv4Address"]: self._log("curr-iface-chenge-ip").notice("Curr iface changed ip (%s). Curr iface %s, new config %s" % (self._name, self._currIfaceName, newIface)) configChanged = True # If iface nas changed, we need to get a new iface to bind if not self._currIfaceName or ifaceChanged: self._getActiveIfaceToBind(newIfaceList) # If we got a new iface we need to restart. # If no iface is active, this whould cause the watchdog to stop the logpusher. self._shouldRestart = True if configChanged: # Curr iface config changed. Need to restart self._shouldRestart = True #----------------------------------------------------------------------------------------------------------------------- def updateNetworkStatus (self, newIfaceList): self._log("update-network-status").debug1("Update network status (%s). New config %s" % (self._name, newIfaceList)) # If we don't have an interface we need to do the whole network update if self._currIfaceName is None: self.updateNetworkConfig(newIfaceList) return # If we do have an iface, check it its status has changed. for ifaceName, iface in newIfaceList.items(): if ifaceName == self._currIfaceName: if self._currIface["ipv4ServiceStatus"] and not iface["ipv4ServiceStatus"]: self._log("iface-service-status-changed").notice("Bind iface %s service status changed from %s to %s" % (self._currIfaceName, self._currIface["ipv4ServiceStatus"], iface["ipv4ServiceStatus"])) if self._forceIfaceToBind: self._log("no-failover").notice("Failover is not allowed. Forced iface is %s" % self._forceIfaceToBind) self.stop() else: prevName = self._currIfaceName self._getActiveIfaceToBind(newIfaceList) self._log("failover").notice("Failover from iface %s to %s" % (prevName, self._currIfaceName)) self._shouldRestart = True # Update the curr iface self._currIface["ipv4ServiceStatus"] = iface["ipv4ServiceStatus"] #----------------------------------------------------------------------------------------------------------------------- def watchdog (self): self._log("watchdog").debug2("Watchdog (%s)" % self._name) if self._shouldRestart: self.stop() self._shouldRestart = False self._log("watchdog-before").debug2("Watchdog (%s). Before configuration lock" % self._name) with self._netConfigurationLock: self._log("watchdog-after").debug2("Watchdog (%s). After configuration lock" % self._name) if self._currIfaceName is None: if self._gotFirstNetworkConfig: self._log("no-net-config-warning").warning("Can't start logpusher (%s). No network interface is available" % self._name) else: self._log("no-net-config-ok").notice("Can't start logpusher (%s). Waiting for network config" % self._name) return if self._logpusher is None or not self._logpusher.isUp(): if not self._gracefullyStopped: self._log("should-start").warning("Logpusher is down (%s). Starting" % self._name) self._start() # Enforce "sent archive" size limit if time.time() - self._lastHistoryDirSizeEnforce > HISTORY_ENFORCE_SIZE_INTERVAL_SEC: self._log("history-enforce-size").debug1("Watchdod (%s). Enforce history dir size (%d MB)" % (self._name, self._historyDirMaxSizeMB)) self._historySizeEnforcer.enforceSize() #----------------------------------------------------------------------------------------------------------------------- def _start (self): self._log("starting-logpusher").notice("Starting logpusher (%s)", self._name) if not self._createConfigFile(): return False if self._logpusher is None: self._logpusher = a.infra.subprocess.Subprocess("llnw-logpusher", self._log) if self._logpusher is None: self._log("error-creating-subprocess").warning("Error creating subprocess (%s)" % self._name) return cmd = "%s -d -f %s" % (self._logpusherExecutable, self._configFile) self._logpusher.start(cmd, shell = True, setpgrp = True, stdout = a.infra.subprocess.PIPE, stderr = a.infra.subprocess.STDOUT, bufsize = 1) self._writePidFile() self._startCnt += 1 # Read logpusher stdout in the background self._thread = threading.Thread(target = self._logOutput, args = (self._log, self._logpusher)) self._thread.daemon = True # thread dies with the program self._thread.start() self._gracefullyStopped = False #----------------------------------------------------------------------------------------------------------------------- def stop (self): if not self._logpusher: return if self._logpusher.isUp(): self._log("stoping-logpusher").notice("Stopping logpusher (%s)", self._name) self._logpusher.stop(5) # Timeout 5 seconds self._deletePidFile() #self._logpusher = None self._gracefullyStopped = True #----------------------------------------------------------------------------------------------------------------------- def _createConfigFile (self): try: # Create logpusher's config file f = open(self._configFile, 'w') # We do our own bzip2 # f.write("log %s/*.log bzip2 flock olddir %s type %s\n" % (self._logDir, self._historyDir, self._logType)) f.write("log %s/*.log.bz2 oldtime 1s flock olddir %s type %s\n" % (self._logDir, self._historyDir, self._logType)) f.write("monitorListenAddress localhost\n"); f.write("monitorListenPort %d\n" % self._monitorListenPort); f.write("sourceAddress %s\n" % self._currIface["ipv4Address"]); f.close() self._log("create-config-file").info("logpusher config file created (%s). File %s" % (self._name, self._configFile)) return True except Exception as ex: self._log("create-config-file-error").error("Error creating logpusher config file (%s). File %s. Exception %s" % (self._name, self._configFile, ex)) return False #----------------------------------------------------------------------------------------------------------------------- def _getActiveIfaceToBind(self, ifaceList): newIface = None newIfaceName = None for ifaceName, iface in ifaceList.items(): if self._isIfaceActive(iface): newIface = iface newIfaceName = ifaceName break elif ifaceName == self._forceIfaceToBind: # The current iface is the iface that we are forced to bind to and if is not active... break with self._netConfigurationLock: if newIface: self._currIface = newIface.copy() else: self._currIface = None self._currIfaceName = newIfaceName self._log("get-iface-to-bind").debug1("Got iface %s. %s" %(self._currIfaceName, self._currIface)) #----------------------------------------------------------------------------------------------------------------------- def _isIfaceActive(self, iface): enable = iface.get("enable", False) ipv4ServiceStatus = iface.get("ipv4ServiceStatus", False) return enable and ipv4ServiceStatus #----------------------------------------------------------------------------------------------------------------------- def _writePidFile (self): if self._logpusher: pid = self._logpusher.getPid() try: # Create logpusher's pid file f = open(self._pidFile, 'w') f.write("%d" % (pid)) f.close() self._log("create-pid-file").debug1("logpusher pid file created (%s). File %s" % (self._name, self._pidFile)) return True except Exception as ex: self._log("create-pid-file-error").error("Error creating logpusher pid file (%s). File %s. Exception %s" % (self._name, self._pidFile, ex)) return False #----------------------------------------------------------------------------------------------------------------------- def _deletePidFile (self): try: # Delete logpusher's pid file. Indicates proper shutdown os.remove(self._pidFile) self._log("delete-pid-file").debug1("logpusher pid file deleted (%s). File %s" % (self._name, self._pidFile)) return True except Exception as ex: self._log("delete-pid-file-error").error("Error deleting logpusher pid file (%s). File %s. Exception %s" % (self._name, self._pidFile, ex)) return False #----------------------------------------------------------------------------------------------------------------------- def _readPidFile (self): try: with open(self._pidFile, 'r') as f: pid = f.read() try: int_pid = int(pid) except ValueError: self._log("pid-file-error").error("Error reading logpusher pid file (%s). File %s Dosen't contain a pid" % (self._name, self._pidFile)) return 0 return int_pid except Exception as ex: # No pid file self._log("read-pid-file-error").debug1("Error reading logpusher pid file (%s). File %s. Exception %s" % (self._name, self._pidFile, ex)) return 0 #----------------------------------------------------------------------------------------------------------------------- def _logOutput(self, logger, logpusher): while True: try: line = logpusher.stdout.readline() if not line: logger("logpusher-stdout-thread-exit").notice("Logpusher stdout logging thread exit (%s)" % self._name) break logger("logpusher-stdout").notice("%s: %s" % (self._name, line.rstrip())) except IOError, ex: logger("logpusher-stdout-ioerror").warning("%s: %s" % (self._name, ex))