def sendBillingInfoRecordsToGratia(self): """ This is the public method for starting the dCache-transfer reporting. This will query records no more than _maxAge old, and always starts queries on hour time boundaries (i.e., 1:00:00 not 1:02:00). This will continue to query until we hit records starting less than 75 minutes ago, then return. By default, we start with querying 60-second intervals, but will shrink this window if we encounter lots of data. If not summarizing: this method uses _execute to get all the data for a given interval, then uses _processResults to send them to Gratia. Once the query for a time interval is done, then we immediately checkpoint. If summarizing: this method continues to query until it hits the end of an hour interval. At that point, it summarizes once again, and sends the summaries up to Gratia. We then only checkpoint on the hour. """ self._log.debug("sendBillingInfoRecordsToGratia") # Query no more than a set number of days in the past minTime = datetime.datetime.now() - datetime.timedelta(self._maxAge, 0) minTime = datetime.datetime(minTime.year, minTime.month, minTime.day, minTime.hour, 0, 0) # The latest allowed record is 75 minutes in the past, in order to make # sure we only query complete intervals latestAllowed = datetime.datetime.now() - datetime.timedelta(0, 75*60) if ( TestContainer.isTest() ): latestAllowed = TestContainer.getEndDateTime() # Start with either the last checkpoint or minTime days ago, whichever # is more recent. starttime = max(self._BIcheckpoint.lastDateStamp(), minTime) self._log.info("Starting queries at time %s." % starttime) dictRecordAgg = TimeBinRange.DictRecordAggregator(DCACHE_AGG_FIELDS, DCACHE_SUM_FIELDS) nextSummary = self._determineNextEndtime(starttime, summary=True) if self._summarize: self._log.debug("Next summary send time: %s." % nextSummary) results = [] endtime = self._determineNextEndtime(starttime) totalRecords = 0 # Loop until we have caught up to latestAllowed. while starttime < latestAllowed: assert starttime < endtime self._log.debug('sendBillingInfoRecordsToGratia: Processing ' \ 'starting at %s.' % starttime) # We are guaranteed that starttime will move forward to the value of # endtime every time we call execute. next_starttime, rows = self._execute(starttime, endtime, self._maxSelect) results += rows totalRecords += len(rows) if self._summarize: # Summarize the partial results results = Collapse.collapse(results, dictRecordAgg) assert next_starttime > starttime next_endtime = self._determineNextEndtime(next_starttime) # If we're not summarizing, we send up records each loop. if (not self._summarize) and results: totalRecords = 0 # We now have all the rows we want; process them self._BIcheckpoint.createPending(endtime, '') self._processResults(results) self._BIcheckpoint.commit() if (self._range < STARTING_RANGE and len(results)*4 < \ self._maxSelect): self._range = STARTING_RANGE results = [] # If we are summarizing, send records only per hour of data elif (next_endtime > nextSummary) and results: num_agg = totalRecords - len(results) if num_agg: factor = float(totalRecords)/float(len(results)) self._log.info("Aggregated %i of %i records for time " \ "interval ending in %s. %.1fx reduction." % \ (num_agg, totalRecords, nextSummary, factor)) else: self._log.debug("Unable to aggregate any of %i records" \ % totalRecords) totalRecords = 0 self._BIcheckpoint.createPending(nextSummary, '') self._processResults(results) self._BIcheckpoint.commit() results = [] self._range = STARTING_RANGE nextSummary = self._determineNextEndtime(next_starttime, summary=True) endtime = next_endtime starttime = next_starttime # Check to see if the stop file has been created. If so, break if os.path.exists(self._stopFileName): #Neha - 03/17/2011 #Don't need to commit anything since we are only doing select and no inserts or updates self._cur.close() self._connection.close() break
global recordsToSend if (not TEST): return log.info("Send to gratia:") dump(log, createStatistics(recordsToSend)) log.info("Generated:") dump(log, createStatistics(BillingRecSimulator.sqlTableContent)) def dump(log, (overall, initiator, errorcode, totalRecords)): log.info("Overall %s" % overall) log.info("initiator %s" % initiator) log.info("errorcode %s" % errorcode) log.info("num records %s" % totalRecords) if __name__ == "__main__": recordsToSend = BillingRecSimulator.generateTableContent() print "Pre aggregation" print createStatistics(recordsToSend) recordsToSend = Collapse.collapse( recordsToSend, TimeBinRange.DictRecordAggregator( ['initiator', 'client', 'protocol', 'errorcode', 'isnew'], ['njobs', 'transfersize', 'connectiontime'])) print "Post Aggregation" print createStatistics(recordsToSend)
return sum def dumpStatistics(log): global recordsToSend if ( not TEST ): return log.info("Send to gratia:") dump(log,createStatistics(recordsToSend)) log.info("Generated:") dump(log,createStatistics(BillingRecSimulator.sqlTableContent)) def dump(log,(overall,initiator,errorcode,totalRecords)): log.info("Overall %s" % overall) log.info("initiator %s"% initiator) log.info("errorcode %s" % errorcode) log.info("num records %s" % totalRecords) if __name__ == "__main__": recordsToSend = BillingRecSimulator.generateTableContent() print "Pre aggregation" print createStatistics(recordsToSend) recordsToSend = Collapse.collapse(recordsToSend,TimeBinRange.DictRecordAggregator(['initiator','client', 'protocol','errorcode','isnew' ],['njobs','transfersize','connectiontime'])) print "Post Aggregation" print createStatistics(recordsToSend)
def sendBillingInfoRecordsToGratia(self): """ This is the public method for starting the dCache-transfer reporting. This will query records no more than _maxAge old, and always starts queries on hour time boundaries (i.e., 1:00:00 not 1:02:00). This will continue to query until we hit records starting less than 75 minutes ago, then return. By default, we start with querying 60-second intervals, but will shrink this window if we encounter lots of data. If not summarizing: this method uses _execute to get all the data for a given interval, then uses _processResults to send them to Gratia. Once the query for a time interval is done, then we immediately checkpoint. If summarizing: this method continues to query until it hits the end of an hour interval. At that point, it summarizes once again, and sends the summaries up to Gratia. We then only checkpoint on the hour. """ self._log.debug("sendBillingInfoRecordsToGratia") # Query no more than a set number of days in the past minTime = datetime.datetime.now() - datetime.timedelta(self._maxAge, 0) minTime = datetime.datetime(minTime.year, minTime.month, minTime.day, minTime.hour, 0, 0) # The latest allowed record is 75 minutes in the past, in order to make # sure we only query complete intervals latestAllowed = datetime.datetime.now() - datetime.timedelta( 0, 75 * 60) if (TestContainer.isTest()): latestAllowed = TestContainer.getEndDateTime() # Start with either the last checkpoint or minTime days ago, whichever # is more recent. starttime = max(self._BIcheckpoint.lastDateStamp(), minTime) self._log.info("Starting queries at time %s." % starttime) dictRecordAgg = TimeBinRange.DictRecordAggregator( DCACHE_AGG_FIELDS, DCACHE_SUM_FIELDS) nextSummary = self._determineNextEndtime(starttime, summary=True) if self._summarize: self._log.debug("Next summary send time: %s." % nextSummary) results = [] endtime = self._determineNextEndtime(starttime) totalRecords = 0 # Loop until we have caught up to latestAllowed. while starttime < latestAllowed: assert starttime < endtime self._log.debug('sendBillingInfoRecordsToGratia: Processing ' \ 'starting at %s.' % starttime) # We are guaranteed that starttime will move forward to the value of # endtime every time we call execute. next_starttime, rows = self._execute(starttime, endtime, self._maxSelect) results += rows totalRecords += len(rows) if self._summarize: # Summarize the partial results results = Collapse.collapse(results, dictRecordAgg) assert next_starttime > starttime next_endtime = self._determineNextEndtime(next_starttime) # If we're not summarizing, we send up records each loop. if (not self._summarize) and results: totalRecords = 0 # We now have all the rows we want; process them self._BIcheckpoint.createPending(endtime, '') self._processResults(results) self._BIcheckpoint.commit() if (self._range < STARTING_RANGE and len(results)*4 < \ self._maxSelect): self._range = STARTING_RANGE results = [] # If we are summarizing, send records only per hour of data elif (next_endtime > nextSummary) and results: num_agg = totalRecords - len(results) if num_agg: factor = float(totalRecords) / float(len(results)) self._log.info("Aggregated %i of %i records for time " \ "interval ending in %s. %.1fx reduction." % \ (num_agg, totalRecords, nextSummary, factor)) else: self._log.debug("Unable to aggregate any of %i records" \ % totalRecords) totalRecords = 0 self._BIcheckpoint.createPending(nextSummary, '') self._processResults(results) self._BIcheckpoint.commit() results = [] self._range = STARTING_RANGE nextSummary = self._determineNextEndtime(next_starttime, summary=True) endtime = next_endtime starttime = next_starttime # Check to see if the stop file has been created. If so, break if os.path.exists(self._stopFileName): #Neha - 03/17/2011 #Don't need to commit anything since we are only doing select and no inserts or updates self._cur.close() self._connection.close() break