def getTimeSeperatorsBasedOnAvailableMemory(startTime, endTime, clients, fileType, machines): """ @summary: returns the time seperators to be used for the transfer in a way that should prevent overloading memory. @param startTime: start time of the transfer to be attempted. @param endTime: end time of the transfer to be attempted. @param clients: lists of clients/sources to be transferred. @param fileType: tx or rx. @param machines: machines on wich the clients/sources reside. @return: the time seperators. """ width = 0 # Width in seconds of the transfer to be attempted seperators = [ ] # Time sperators representing every hour to be transferred. allFiles = [] # List of all pickle files that will be involved hourlyFiles = [] # List of all files to be handled for a certain hour. hourlyFileSizes = [ ] # Total file size of all the files to be handled at a certain hour. totalSizeToloadInMemory = 0.0 # Total size of all the pickle files to load in memory currentlyAvailableMemory = 0.0 # Total currently available memory on the present machine. seperatorsBasedOnAvailableMemory = [ startTime, endTime ] # Suppose we have all the momory we need. width = (StatsDateLib.getSecondsSinceEpoch(endTime) - StatsDateLib.getSecondsSinceEpoch(startTime)) / StatsDateLib.HOUR seperators = [startTime] seperators.extend( StatsDateLib.getSeparatorsWithStartTime( startTime=startTime, width=width * StatsDateLib.HOUR, interval=StatsDateLib.HOUR)[:-1]) for seperator in seperators: hourlyFiles = PickleMerging.createNonMergedPicklesList( seperator, machines, fileType, clients) allFiles.extend(hourlyFiles) hourlyFileSizes.append( MemoryManagement.getTotalSizeListOfFiles(hourlyFiles)) totalSizeToloadInMemory = MemoryManagement.getTotalSizeListOfFiles( allFiles) currentlyAvailableMemory = MemoryManagement.getCurrentFreeMemory( marginOfError=0.75 ) #never expect more than 25% of the avaiable memory to be avaiable for pickle loading. if totalSizeToloadInMemory >= currentlyAvailableMemory: seperatorsBasedOnAvailableMemory = MemoryManagement.getSeperatorsForHourlyTreatments( startTime, endTime, currentlyAvailableMemory, hourlyFileSizes) return seperatorsBasedOnAvailableMemory
def mergePicklesFromDifferentHours( logger = None , startTime = "2006-07-31 13:00:00",\ endTime = "2006-07-31 19:00:00", client = "satnet",\ machine = "pdsPM", fileType = "tx" ): """ @summary : This method merges entire hourly pickles files together. @None : This does not support merging part of the data of pickles. """ if logger != None : logger.debug( _("Call to mergeHourlyPickles received.") ) logging = True else: logging = False pickles = [] entries = {} width = StatsDateLib.getSecondsSinceEpoch( endTime ) - StatsDateLib.getSecondsSinceEpoch( startTime ) startTime = StatsDateLib.getIsoWithRoundedHours( startTime ) seperators = [startTime] seperators.extend( StatsDateLib.getSeparatorsWithStartTime( startTime = startTime , width=width, interval=60*StatsDateLib.MINUTE )[:-1]) for seperator in seperators : pickles.append( StatsPickler.buildThisHoursFileName( client = client, offset = 0, currentTime = seperator, machine = machine, fileType = fileType ) ) startingNumberOfEntries = 0 #print "prior to loading and merging pickles : %s " %( StatsDateLib.getIsoFromEpoch( time.time() ) ) for pickle in pickles : if os.path.isfile( pickle ) : tempCollection = CpickleWrapper.load( pickle ) if tempCollection != None : for i in xrange( len( tempCollection.fileEntries ) ): entries[startingNumberOfEntries + i] = tempCollection.fileEntries[i] startingNumberOfEntries = startingNumberOfEntries + len( tempCollection.fileEntries ) else: sys.exit() else: emptyEntries = PickleMerging.fillWithEmptyEntries( nbEmptyEntries = 60, entries = {} ) for i in xrange( 60 ): entries[i + startingNumberOfEntries ] = emptyEntries [i] startingNumberOfEntries = startingNumberOfEntries + 60 #print "after the loading and merging og pickles : %s " %( StatsDateLib.getIsoFromEpoch( time.time() ) ) statsCollection = FileStatsCollector( startTime = startTime , endTime = endTime, interval = StatsDateLib.MINUTE, totalWidth = width, fileEntries = entries,fileType= fileType, logger = logger, logging = logging ) return statsCollection
def main(): """ @summary: Small test case to see if everything works fine """ statsConfig = StatsConfigParameters() statsConfig.getAllParameters() machineconfig = MachineConfigParameters() machineconfig.getParametersFromMachineConfigurationFile() currentTimeEpochFormat = time.time() - (120 * 60) endTime = StatsDateLib.getIsoWithRoundedHours( StatsDateLib.getIsoFromEpoch(currentTimeEpochFormat)) startTime = StatsDateLib.getIsoWithRoundedHours( StatsDateLib.getIsoFromEpoch(currentTimeEpochFormat - (StatsDateLib.DAY * 7))) print startTime, endTime groupName = statsConfig.groupParameters.groups[0] clients = statsConfig.groupParameters.groupsMembers[groupName] machines = statsConfig.groupParameters.groupsMachines[groupName] fileType = statsConfig.groupParameters.groupFileTypes[groupName] seperators = [startTime] seperators.extend( StatsDateLib.getSeparatorsWithStartTime( startTime=startTime, width=StatsDateLib.DAY * 7, interval=StatsDateLib.HOUR)[:-1]) listOfFiles = PickleMerging.createMergedPicklesList( startTime, endTime, clients, groupName, fileType, machines, seperators) listOfFileSizes = MemoryManagement.getListOfFileSizes(listOfFiles) currentFreeMemory = MemoryManagement.getCurrentFreeMemory(0.55555) if MemoryManagement.getTotalSizeListOfFiles( listOfFiles) > currentFreeMemory: seperators = MemoryManagement.getSeperatorsForHourlyTreatments( startTime, endTime, currentFreeMemory, listOfFileSizes) print seperators else: print "We have %s bytes free and the pickles require %s bytes" % ( currentFreeMemory, getTotalSizeListOfFiles(listOfFiles)) print "we have enough memory to merge all these pickles."
def getTimeSeperatorsBasedOnAvailableMemory( startTime, endTime, clients, fileType, machines ): """ @summary: returns the time seperators to be used for the transfer in a way that should prevent overloading memory. @param startTime: start time of the transfer to be attempted. @param endTime: end time of the transfer to be attempted. @param clients: lists of clients/sources to be transferred. @param fileType: tx or rx. @param machines: machines on wich the clients/sources reside. @return: the time seperators. """ width = 0 # Width in seconds of the transfer to be attempted seperators = [] # Time sperators representing every hour to be transferred. allFiles =[] # List of all pickle files that will be involved hourlyFiles = [] # List of all files to be handled for a certain hour. hourlyFileSizes = [] # Total file size of all the files to be handled at a certain hour. totalSizeToloadInMemory = 0.0 # Total size of all the pickle files to load in memory currentlyAvailableMemory = 0.0 # Total currently available memory on the present machine. seperatorsBasedOnAvailableMemory = [startTime, endTime] # Suppose we have all the momory we need. width = ( StatsDateLib.getSecondsSinceEpoch( endTime ) - StatsDateLib.getSecondsSinceEpoch( startTime ) ) / StatsDateLib.HOUR seperators = [ startTime ] seperators.extend( StatsDateLib.getSeparatorsWithStartTime( startTime = startTime , width= width*StatsDateLib.HOUR, interval=StatsDateLib.HOUR )[:-1]) for seperator in seperators: hourlyFiles = PickleMerging.createNonMergedPicklesList( seperator, machines, fileType, clients ) allFiles.extend( hourlyFiles ) hourlyFileSizes.append( MemoryManagement.getTotalSizeListOfFiles( hourlyFiles ) ) totalSizeToloadInMemory = MemoryManagement.getTotalSizeListOfFiles( allFiles ) currentlyAvailableMemory = MemoryManagement.getCurrentFreeMemory( marginOfError = 0.75 )#never expect more than 25% of the avaiable memory to be avaiable for pickle loading. if totalSizeToloadInMemory >= currentlyAvailableMemory: seperatorsBasedOnAvailableMemory = MemoryManagement.getSeperatorsForHourlyTreatments( startTime, endTime, currentlyAvailableMemory, hourlyFileSizes ) return seperatorsBasedOnAvailableMemory
def main(): """ @summary: Small test case to see if everything works fine """ statsConfig = StatsConfigParameters() statsConfig.getAllParameters() machineconfig = MachineConfigParameters() machineconfig.getParametersFromMachineConfigurationFile() currentTimeEpochFormat = time.time() -(120*60) endTime = StatsDateLib.getIsoWithRoundedHours( StatsDateLib.getIsoFromEpoch( currentTimeEpochFormat ) ) startTime = StatsDateLib.getIsoWithRoundedHours( StatsDateLib.getIsoFromEpoch( currentTimeEpochFormat -( StatsDateLib.DAY*7 ) ) ) print startTime, endTime groupName = statsConfig.groupParameters.groups[0] clients = statsConfig.groupParameters.groupsMembers[ groupName ] machines = statsConfig.groupParameters.groupsMachines[ groupName ] fileType = statsConfig.groupParameters.groupFileTypes[ groupName ] seperators = [startTime] seperators.extend( StatsDateLib.getSeparatorsWithStartTime( startTime = startTime , width=StatsDateLib.DAY*7, interval=StatsDateLib.HOUR )[:-1]) listOfFiles = PickleMerging.createMergedPicklesList( startTime, endTime, clients, groupName, fileType, machines, seperators ) listOfFileSizes = MemoryManagement.getListOfFileSizes(listOfFiles) currentFreeMemory = MemoryManagement.getCurrentFreeMemory(0.55555) if MemoryManagement.getTotalSizeListOfFiles( listOfFiles ) > currentFreeMemory: seperators = MemoryManagement.getSeperatorsForHourlyTreatments( startTime, endTime, currentFreeMemory, listOfFileSizes ) print seperators else: print "We have %s bytes free and the pickles require %s bytes" %( currentFreeMemory, getTotalSizeListOfFiles( listOfFiles ) ) print "we have enough memory to merge all these pickles."
def mergePicklesFromDifferentSources( logger = None , startTime = "2006-07-31 13:00:00",\ endTime = "2006-07-31 19:00:00", clients = ["someclient"],\ fileType = "tx", machines = [], groupName = "" ): """ @summary : This method allows user to merge pickles coming from numerous machines covering as many hours as wanted, into a single FileStatsCollector entry. Very usefull when creating graphics on a central server with pickle files coming from remote locations. """ combinedMachineName = "" combinedClientName = "" combinedMachineName = combinedMachineName.join( [machine for machine in machines ] ) combinedClientName = combinedClientName.join( [client for client in clients] ) if groupName !="": clientsForVersionManagement = groupName else: clientsForVersionManagement = clients vc = PickleVersionChecker() vc.getClientsCurrentFileList( clients ) vc.getSavedList( user = combinedMachineName, clients = clientsForVersionManagement ) width = StatsDateLib.getSecondsSinceEpoch( endTime ) - StatsDateLib.getSecondsSinceEpoch( startTime ) startTime = StatsDateLib.getIsoWithRoundedHours( startTime ) seperators = [startTime] seperators.extend( StatsDateLib.getSeparatorsWithStartTime( startTime = startTime , width=width, interval=60*StatsDateLib.MINUTE )[:-1]) mergedPickleNames = PickleMerging.createMergedPicklesList( startTime = startTime, endTime = endTime, machines = machines,\ fileType = fileType, clients = clients, groupName = groupName,\ seperators = seperators ) #Resulting list of the merger. for i in xrange( len( mergedPickleNames ) ) : #for every merger needed needToMergeSameHoursPickle = False pickleNames = PickleMerging.createNonMergedPicklesList( currentTime = seperators[i], machines = machines, fileType = fileType, clients = clients ) if not os.path.isfile( mergedPickleNames[i] ): needToMergeSameHoursPickle = True else: for pickle in pickleNames : #Verify every pickle implicated in merger. # if for some reason pickle has changed since last time if vc.isDifferentFile( file = pickle, user = combinedMachineName, clients = clientsForVersionManagement ) == True : needToMergeSameHoursPickle = True break if needToMergeSameHoursPickle == True :#First time or one element has changed PickleMerging.mergePicklesFromSameHour( logger = logger , pickleNames = pickleNames , clientName = combinedClientName,\ combinedMachineName = combinedMachineName, currentTime = seperators[i],\ mergedPickleName = mergedPickleNames[i], fileType = fileType ) for pickle in pickleNames : vc.updateFileInList( file = pickle ) vc.saveList( user = combinedMachineName, clients = clientsForVersionManagement ) # Once all machines have merges the necessary pickles we merge all pickles # into a single file stats entry. if groupName !="": nameToUseForMerger = groupName else: nameToUseForMerger = "" nameToUseForMerger = nameToUseForMerger.join( [ client for client in clients] ) newFSC = PickleMerging.mergePicklesFromDifferentHours( logger = logger , startTime = startTime, endTime = endTime, client = nameToUseForMerger,\ machine = combinedMachineName,fileType = fileType ) return newFSC
def mergePicklesFromDifferentSources( logger = None , startTime = "2006-07-31 13:00:00",\ endTime = "2006-07-31 19:00:00", clients = ["someclient"],\ fileType = "tx", machines = [], groupName = "" ): """ @summary : This method allows user to merge pickles coming from numerous machines covering as many hours as wanted, into a single FileStatsCollector entry. Very usefull when creating graphics on a central server with pickle files coming from remote locations. """ combinedMachineName = "" combinedClientName = "" combinedMachineName = combinedMachineName.join( [machine for machine in machines]) combinedClientName = combinedClientName.join( [client for client in clients]) if groupName != "": clientsForVersionManagement = groupName else: clientsForVersionManagement = clients vc = PickleVersionChecker() vc.getClientsCurrentFileList(clients) vc.getSavedList(user=combinedMachineName, clients=clientsForVersionManagement) width = StatsDateLib.getSecondsSinceEpoch( endTime) - StatsDateLib.getSecondsSinceEpoch(startTime) startTime = StatsDateLib.getIsoWithRoundedHours(startTime) seperators = [startTime] seperators.extend( StatsDateLib.getSeparatorsWithStartTime(startTime=startTime, width=width, interval=60 * StatsDateLib.MINUTE)[:-1]) mergedPickleNames = PickleMerging.createMergedPicklesList( startTime = startTime, endTime = endTime, machines = machines,\ fileType = fileType, clients = clients, groupName = groupName,\ seperators = seperators ) #Resulting list of the merger. for i in xrange(len(mergedPickleNames)): #for every merger needed needToMergeSameHoursPickle = False pickleNames = PickleMerging.createNonMergedPicklesList( currentTime=seperators[i], machines=machines, fileType=fileType, clients=clients) if not os.path.isfile(mergedPickleNames[i]): needToMergeSameHoursPickle = True else: for pickle in pickleNames: #Verify every pickle implicated in merger. # if for some reason pickle has changed since last time if vc.isDifferentFile( file=pickle, user=combinedMachineName, clients=clientsForVersionManagement) == True: needToMergeSameHoursPickle = True break if needToMergeSameHoursPickle == True: #First time or one element has changed PickleMerging.mergePicklesFromSameHour( logger = logger , pickleNames = pickleNames , clientName = combinedClientName,\ combinedMachineName = combinedMachineName, currentTime = seperators[i],\ mergedPickleName = mergedPickleNames[i], fileType = fileType ) for pickle in pickleNames: vc.updateFileInList(file=pickle) vc.saveList(user=combinedMachineName, clients=clientsForVersionManagement) # Once all machines have merges the necessary pickles we merge all pickles # into a single file stats entry. if groupName != "": nameToUseForMerger = groupName else: nameToUseForMerger = "" nameToUseForMerger = nameToUseForMerger.join( [client for client in clients]) newFSC = PickleMerging.mergePicklesFromDifferentHours( logger = logger , startTime = startTime, endTime = endTime, client = nameToUseForMerger,\ machine = combinedMachineName,fileType = fileType ) return newFSC
def mergePicklesFromDifferentHours( logger = None , startTime = "2006-07-31 13:00:00",\ endTime = "2006-07-31 19:00:00", client = "satnet",\ machine = "pdsPM", fileType = "tx" ): """ @summary : This method merges entire hourly pickles files together. @None : This does not support merging part of the data of pickles. """ if logger != None: logger.debug(_("Call to mergeHourlyPickles received.")) logging = True else: logging = False pickles = [] entries = {} width = StatsDateLib.getSecondsSinceEpoch( endTime) - StatsDateLib.getSecondsSinceEpoch(startTime) startTime = StatsDateLib.getIsoWithRoundedHours(startTime) seperators = [startTime] seperators.extend( StatsDateLib.getSeparatorsWithStartTime(startTime=startTime, width=width, interval=60 * StatsDateLib.MINUTE)[:-1]) for seperator in seperators: pickles.append( StatsPickler.buildThisHoursFileName(client=client, offset=0, currentTime=seperator, machine=machine, fileType=fileType)) startingNumberOfEntries = 0 #print "prior to loading and merging pickles : %s " %( StatsDateLib.getIsoFromEpoch( time.time() ) ) for pickle in pickles: if os.path.isfile(pickle): tempCollection = CpickleWrapper.load(pickle) if tempCollection != None: for i in xrange(len(tempCollection.fileEntries)): entries[startingNumberOfEntries + i] = tempCollection.fileEntries[i] startingNumberOfEntries = startingNumberOfEntries + len( tempCollection.fileEntries) else: sys.exit() else: emptyEntries = PickleMerging.fillWithEmptyEntries( nbEmptyEntries=60, entries={}) for i in xrange(60): entries[i + startingNumberOfEntries] = emptyEntries[i] startingNumberOfEntries = startingNumberOfEntries + 60 #print "after the loading and merging og pickles : %s " %( StatsDateLib.getIsoFromEpoch( time.time() ) ) statsCollection = FileStatsCollector(startTime=startTime, endTime=endTime, interval=StatsDateLib.MINUTE, totalWidth=width, fileEntries=entries, fileType=fileType, logger=logger, logging=logging) return statsCollection
def updateHourlyPickles( infos, paths, logger = None ): """ @summary : This method is to be used when hourly pickling is done. -1 pickle per hour per client. This method needs will update the pickles by collecting data from the time of the last pickle up to the current date.(System time or the one specified by the user.) If for some reason data wasnt collected for one or more hour since last pickle,pickles for the missing hours will be created and filled with data. If no entries are found for this client in the pickled-times file, we take for granted that this is a new client. In that case data will be collected from the top of the hour up to the time of the call. If new client has been producing data before the day of the first call, user can specify a different time than system time to specify the first day to pickle. He can then call this method with the current system time, and data between first day and current time will be collected so that pickling can continue like the other clients can. """ sp = StatsPickler( logger = logger ) pathToLogFiles = GeneralStatsLibraryMethods.getPathToLogFiles( LOCAL_MACHINE, infos.machine ) for i in range( len (infos.clients) ) : sp.client = infos.clients[i] width = StatsDateLib.getSecondsSinceEpoch(infos.endTime) - StatsDateLib.getSecondsSinceEpoch( StatsDateLib.getIsoWithRoundedHours(infos.startTimes[i] ) ) if width > StatsDateLib.HOUR :#In case pickling didnt happen for a few hours for some reason... hours = [infos.startTimes[i]] hours.extend( StatsDateLib.getSeparatorsWithStartTime( infos.startTimes[i], interval = StatsDateLib.HOUR, width = width )) for j in range( len(hours)-1 ): #Covers hours where no pickling was done. startOfTheHour = StatsDateLib.getIsoWithRoundedHours( hours[j] ) startTime = startOfTheHour endTime = StatsDateLib.getIsoFromEpoch( StatsDateLib.getSecondsSinceEpoch( StatsDateLib.getIsoWithRoundedHours(hours[j+1] ) )) #print " client : %s startTime : %s endTime : %s" %(infos.clients[i], startTime, endTime ) if startTime >= endTime and logger != None : try: logger.warning( _("Startime used in updateHourlyPickles was greater or equal to end time.") ) except: pass sp.pickleName = StatsPickler.buildThisHoursFileName( client = infos.clients[i], currentTime = startOfTheHour, machine = infos.machine, fileType = infos.fileType ) sp.collectStats( types = infos.types, startTime = startTime , endTime = endTime, interval = infos.interval * StatsDateLib.MINUTE,\ directory = pathToLogFiles, fileType = infos.fileType ) else: startTime = infos.startTimes[i] endTime = infos.endTime startOfTheHour = StatsDateLib.getIsoWithRoundedHours( infos.startTimes[i] ) #print " client : %s startTime : %s endTime : %s" %(infos.clients[i], startTime, endTime ) if startTime >= endTime and logger != None :#to be removed try: logger.warning( _("Startime used in updateHourlyPickles was greater or equal to end time.") ) except: pass sp.pickleName = StatsPickler.buildThisHoursFileName( client = infos.clients[i], currentTime = startOfTheHour, machine = infos.machine, fileType = infos.fileType ) sp.collectStats( infos.types, startTime = startTime, endTime = endTime, interval = infos.interval * StatsDateLib.MINUTE, directory = pathToLogFiles, fileType = infos.fileType ) setLastUpdate( machine = infos.machine, client = infos.clients[i], fileType = infos.fileType, currentDate = infos.currentDate, paths = paths, collectUpToNow = infos.collectUpToNow )
def __init__( self, files = None, fileType = "tx", statsTypes = None, startTime = '2005-08-30 20:06:59',\ endTime = '2005-08-30 20:06:59', interval=1*MINUTE, totalWidth = HOUR, firstFilledEntry = 0,\ lastFilledEntry = 0, maxLatency = 15, fileEntries = None, logger = None, logging =True ): """ Constructor. All values can be set from the constructor by the user but recommend usage is to set sourceFile and statsType. The class contains other methods to set the other values properly. constructor receives date in an iso format wich is conveniant for users but transforms it in a seconds since epoch format for ease of use during the program. Precondition : Interval should be smaller than width ! """ global _ _ = self.getTranslatorForModule( CURRENT_MODULE_ABS_PATH ) if fileEntries is None : fileEntries = {} self.files = files or [] # Source files we will use. self.fileType = fileType # Type of files. tx or rx. self.statsTypes = statsTypes or [] # List of types we need to manage. self.fileEntries = fileEntries or {} # list of all entries wich are parsed using time seperators. self.startTime = startTime # Beginning of the timespan used to collect stats. self.endTime = endTime # End of saidtimespan. self.interval = interval # Interval at wich we separate stats entries . self.totalWidth = totalWidth # used to build timesperators. self.maxLatency = maxLatency # Acceptable limit for a latency. self.firstFilledEntry = firstFilledEntry # Last entry for wich we calculated mean max etc.... self.lastFilledEntry = lastFilledEntry # Last entry we filled with data. self.lastPositionRead = 0 # Last read posiiton in the last file read. self.firstLineOfLastFileRead = "" # First line of the last file read. self.loggerName = 'fileStatsCollector' # Name of the logger if none is specified. self.logger = logger # Logger self.logging = logging # Whether or not to enable logging. if self.statsTypes == []: if self.fileType == "tx": self.statsTypes = ["latency", "errors","bytecount"] else: self.statsTypes = [ "errors","bytecount"] timeSeperators = [ startTime ] timeSeperators.extend( StatsDateLib.getSeparatorsWithStartTime( startTime, self.totalWidth, self.interval ) ) self.timeSeperators = timeSeperators self.nbEntries = len ( self.timeSeperators ) -1 # Nb of entries or "buckets" if self.logging == True: if self.logger is None: # Enable logging self.logger = Logger( STATSPATHS.STATSLOGGING + 'stats_' + self.loggerName + '.log.notb', 'INFO', 'TX' + self.loggerName, bytes = True ) self.logger = self.logger.getLogger() if self.fileEntries == {}: self.createEmptyEntries() # Create all empty buckets right away # sorting needs to be done to make sure first file we read is the oldest, thus makes sure # that if we seek the last read position we do it in the right file. self.files.sort() if len( self.files ) > 1 and files[0].endswith("log"):#.log file is always newest. firstItem = self.files[ 0 ] remainingList = self.files[ 1: ] self.files = remainingList self.files.append( firstItem )