Пример #1
0
    def _execute(self, starttime, endtime, maxSelect):
        """
        Execute the select command against the Billing DB return the results
        (possibly summarized)

        It is guaranteed this function will return an endtime greater than the
        starttime, but not guaranteed by how much.

        Note on the time returned as the first part of the tuple:
        We guarantee two things:
           a) returned time is strictly greater than starttime
           b) We return *all* records in the interval [starttime, return time).
        We do not guarantee that return time == parameter endtime.
        Thus it is suitable to use as the start time of the next select query.
        To do this, we reduce the range until it reaches 1 second or the
        query returns less than maxSelect results.   If the interval is one
        second and it still returns maxSelect results then we extend the limit
        of the query until all records fit.

        @param starttime: Datetime object for the start of the query interval.
        @param endtime: Datetime object for the end of the query interval.
        @param maxSelect: The maximum number of rows to select
        @return: Tuple containing the a time that is greater than all the
           records and the results
        """
        assert starttime < endtime
        if (maxSelect > MAX_SELECT) and ((endtime-starttime).seconds <= \
                MIN_RANGE):
            raise Exception("Fatal error - more than %i transfers in %i" \
                " second(s)." % (MAX_SELECT,(endtime-starttime).seconds))
        datestr = str(starttime)
        datestr_end = str(endtime)

        # Query the database.  If it takes more than MAX_QUERY_TIME_SECS, then
        # have the probe self-destruct.
        query=BILLINGDB_SELECT_CMD% ((datestr, datestr_end, datestr, datestr_end, maxSelect))
        self._log.debug('_sendToGratia: will execute ' + query)
        select_time = -time.time()
        if not TestContainer.isTest():
            self._cur.execute(query)
	    result = self._cur.fetchall()
	else:
            result = BillingRecSimulator.execute(query)
        select_time += time.time()
        if select_time > MAX_QUERY_TIME_SECS:
            raise Exception("Postgres query took %i seconds, more than " \
                "the maximum allowable of %i; this is a sign the DB is " \
                "not properly optimized!" % (int(select_time),
                MAX_QUERY_TIME_SECS))
        self._log.debug("BillingDB query finished in %.02f seconds and " \
            "returned %i records." % (select_time, len(result)))

        if not result:
            self._log.debug("No results from %s to %s." % (starttime, endtime))
            return endtime, result
        # dCache sometimes returns a negative transfer size; when this happens,
        # it also tosses up a complete garbage duration
        filtered_result = []
        for row in result:
            row = dict(row)
      	    #print row
	    if row['transfersize'] < 0:
                row['transfersize'] = 0
                row['connectiontime'] = 0
            filtered_result.append(row)
        result = filtered_result

	# If we hit our limit, there's no telling how many identical records
        # there are on the final millisecond; we must re-query with a smaller
        # interval or a higher limit on the select.
        if len(result) == maxSelect:
            diff = endtime - starttime
            interval = diff.days*86400 + diff.seconds
            # Ensure that self._range is such that we always end up on a minute boundary (eventually).
            # Whenever we decrease the interval size it is guaranteed to be a multiple of what's left
            # of the interval to the  next minute.  I.e the transitions are:
            #   60s ->  30s
            #   30s ->  15s (which can only happen at :30s)
            #   15s ->   5s (which can only happen at :15s :30s or :45s)
            #    5s ->   1s
            if   (interval > 60):
                new_interval = 60
            elif (interval > 30):
                new_interval = 30
            elif (interval > 15):
                new_interval = 15
            elif (interval >  5):
                new_interval =  5
            else:
                new_interval =  1
            new_endtime = starttime + datetime.timedelta(0, new_interval)
            # Guard against the DST jump by making sure new_endtime > starttime.
            if (interval == new_interval) or (new_interval == 0) or \
                (new_endtime <= starttime):
                self._log.warning("Limit hit; increasing from %i to %i." % \
                    (maxSelect, maxSelect*2))
                endtime, result = self._execute(starttime, endtime, maxSelect*2)
                assert endtime > starttime
                return endtime, result
            else:
                self._log.warning("Limit hit; decreasing time interval from %i" \
                   " to %i." % (interval, new_interval))
                self._range = new_interval
                endtime, result = self._execute(starttime, new_endtime,
                    maxSelect)
                assert endtime > starttime
                return endtime, result

        return endtime, result
Пример #2
0
    global recordsToSend
    if (not TEST):
        return
    log.info("Send to gratia:")
    dump(log, createStatistics(recordsToSend))

    log.info("Generated:")
    dump(log, createStatistics(BillingRecSimulator.sqlTableContent))


def dump(log, (overall, initiator, errorcode, totalRecords)):
    log.info("Overall %s" % overall)
    log.info("initiator %s" % initiator)
    log.info("errorcode %s" % errorcode)
    log.info("num records %s" % totalRecords)


if __name__ == "__main__":

    recordsToSend = BillingRecSimulator.generateTableContent()
    print "Pre aggregation"
    print createStatistics(recordsToSend)

    recordsToSend = Collapse.collapse(
        recordsToSend,
        TimeBinRange.DictRecordAggregator(
            ['initiator', 'client', 'protocol', 'errorcode', 'isnew'],
            ['njobs', 'transfersize', 'connectiontime']))
    print "Post Aggregation"
    print createStatistics(recordsToSend)
Пример #3
0
    return sum

def dumpStatistics(log):
   global recordsToSend
   if ( not TEST ):
      return
   log.info("Send to gratia:")
   dump(log,createStatistics(recordsToSend))

   log.info("Generated:")
   dump(log,createStatistics(BillingRecSimulator.sqlTableContent))
    
def dump(log,(overall,initiator,errorcode,totalRecords)):
   log.info("Overall %s" % overall)
   log.info("initiator %s"% initiator)
   log.info("errorcode %s" % errorcode)
   log.info("num records %s" % totalRecords)


if __name__ == "__main__":

  recordsToSend = BillingRecSimulator.generateTableContent() 
  print "Pre aggregation"
  print createStatistics(recordsToSend)

  recordsToSend = Collapse.collapse(recordsToSend,TimeBinRange.DictRecordAggregator(['initiator','client', 'protocol','errorcode','isnew' ],['njobs','transfersize','connectiontime']))
  print "Post Aggregation"
  print createStatistics(recordsToSend)

Пример #4
0
    def _execute(self, starttime, endtime, maxSelect):
        """
        Execute the select command against the Billing DB return the results
        (possibly summarized)

        It is guaranteed this function will return an endtime greater than the
        starttime, but not guaranteed by how much.

        Note on the time returned as the first part of the tuple:
        We guarantee two things:
           a) returned time is strictly greater than starttime
           b) We return *all* records in the interval [starttime, return time).
        We do not guarantee that return time == parameter endtime.
        Thus it is suitable to use as the start time of the next select query.
        To do this, we reduce the range until it reaches 1 second or the
        query returns less than maxSelect results.   If the interval is one
        second and it still returns maxSelect results then we extend the limit
        of the query until all records fit.

        @param starttime: Datetime object for the start of the query interval.
        @param endtime: Datetime object for the end of the query interval.
        @param maxSelect: The maximum number of rows to select
        @return: Tuple containing the a time that is greater than all the
           records and the results
        """
        assert starttime < endtime
        if (maxSelect > MAX_SELECT) and ((endtime-starttime).seconds <= \
                MIN_RANGE):
            raise Exception("Fatal error - more than %i transfers in %i" \
                " second(s)." % (MAX_SELECT,(endtime-starttime).seconds))
        datestr = str(starttime)
        datestr_end = str(endtime)

        # Query the database.  If it takes more than MAX_QUERY_TIME_SECS, then
        # have the probe self-destruct.
        query = BILLINGDB_SELECT_CMD % (
            (datestr, datestr_end, datestr, datestr_end, maxSelect))
        self._log.debug('_sendToGratia: will execute ' + query)
        select_time = -time.time()
        if not TestContainer.isTest():
            self._cur.execute(query)
            result = self._cur.fetchall()
        else:
            result = BillingRecSimulator.execute(query)
        select_time += time.time()
        if select_time > MAX_QUERY_TIME_SECS:
            raise Exception("Postgres query took %i seconds, more than " \
                "the maximum allowable of %i; this is a sign the DB is " \
                "not properly optimized!" % (int(select_time),
                MAX_QUERY_TIME_SECS))
        self._log.debug("BillingDB query finished in %.02f seconds and " \
            "returned %i records." % (select_time, len(result)))

        if not result:
            self._log.debug("No results from %s to %s." % (starttime, endtime))
            return endtime, result
        # dCache sometimes returns a negative transfer size; when this happens,
        # it also tosses up a complete garbage duration
        filtered_result = []
        for row in result:
            row = dict(row)
            #print row
            if row['transfersize'] < 0:
                row['transfersize'] = 0
                row['connectiontime'] = 0
            filtered_result.append(row)
        result = filtered_result

        # If we hit our limit, there's no telling how many identical records
        # there are on the final millisecond; we must re-query with a smaller
        # interval or a higher limit on the select.
        if len(result) == maxSelect:
            diff = endtime - starttime
            interval = diff.days * 86400 + diff.seconds
            # Ensure that self._range is such that we always end up on a minute boundary (eventually).
            # Whenever we decrease the interval size it is guaranteed to be a multiple of what's left
            # of the interval to the  next minute.  I.e the transitions are:
            #   60s ->  30s
            #   30s ->  15s (which can only happen at :30s)
            #   15s ->   5s (which can only happen at :15s :30s or :45s)
            #    5s ->   1s
            if (interval > 60):
                new_interval = 60
            elif (interval > 30):
                new_interval = 30
            elif (interval > 15):
                new_interval = 15
            elif (interval > 5):
                new_interval = 5
            else:
                new_interval = 1
            new_endtime = starttime + datetime.timedelta(0, new_interval)
            # Guard against the DST jump by making sure new_endtime > starttime.
            if (interval == new_interval) or (new_interval == 0) or \
                (new_endtime <= starttime):
                self._log.warning("Limit hit; increasing from %i to %i." % \
                    (maxSelect, maxSelect*2))
                endtime, result = self._execute(starttime, endtime,
                                                maxSelect * 2)
                assert endtime > starttime
                return endtime, result
            else:
                self._log.warning("Limit hit; decreasing time interval from %i" \
                   " to %i." % (interval, new_interval))
                self._range = new_interval
                endtime, result = self._execute(starttime, new_endtime,
                                                maxSelect)
                assert endtime > starttime
                return endtime, result

        return endtime, result