Python Histogram.getSampleCount示例，util.histogram.Histogram.getSampleCount Python示例

示例#1

0

显示文件

文件： unittest_histogram.py 项目： theseusyang/mongodb-d4

    def testPickle(self):
        h = Histogram()
        letters = [x for x in string.letters] + ["-"]

        for i in xrange(0, 100):
            key = ""
            for x in xrange(0, 10):
                key += random.choice(letters)
            assert len(key) > 0

            h.put(key, delta=random.randint(1, 10))
            assert h[key] > 0
        ## FOR

        # Serialize
        import pickle

        p = pickle.dumps(h, -1)
        assert p

        # Deserialize
        clone = pickle.loads(p)
        assert clone

        for key in h.keys():
            self.assertEquals(h[key], clone[key])
        ## FOR
        self.assertEquals(h.getSampleCount(), clone.getSampleCount())
        self.assertEquals(sorted(h.getMinCountKeys()), sorted(clone.getMinCountKeys()))

示例#2

0

显示文件

    def testPickle(self):
        h = Histogram()
        letters = [x for x in string.letters] + ["-"]

        for i in xrange(0, 100):
            key = ""
            for x in xrange(0, 10):
                key += random.choice(letters)
            assert len(key) > 0

            h.put(key, delta=random.randint(1, 10))
            assert h[key] > 0
        ## FOR

        # Serialize
        import pickle
        p = pickle.dumps(h, -1)
        assert p

        # Deserialize
        clone = pickle.loads(p)
        assert clone

        for key in h.keys():
            self.assertEquals(h[key], clone[key])
        ## FOR
        self.assertEquals(h.getSampleCount(), clone.getSampleCount())
        self.assertEquals(sorted(h.getMinCountKeys()),
                          sorted(clone.getMinCountKeys()))

示例#3

0

显示文件

    for sess in metadata_db.Session.fetch():
        for op in sess["operations"]:
            QUERY_COUNTS.put(op["query_hash"])
            if not op["query_hash"] in QUERY_HASH_XREF:
                QUERY_HASH_XREF[op["query_hash"]] = []
            QUERY_HASH_XREF[op["query_hash"]].append(op)
            QUERY_COLLECTION_COUNTS.put(op["collection"])
        ## FOR
    ## FOR

    LOG.info("Toal # of Unique Queries: %d", len(QUERY_COUNTS.values()))
    TOTAL_DB_SIZE = sum(
        [col_info["data_size"] for col_info in colls.itervalues()])
    LOG.debug("Estimated Total Database Size: %d" % TOTAL_DB_SIZE)
    TOTAL_QUERY_COUNT = QUERY_COLLECTION_COUNTS.getSampleCount()
    LOG.debug("Total # of Queries: %d" % TOTAL_QUERY_COUNT)

    # HACK: Fix collections
    for col_name, col_info in colls.iteritems():
        col_info["workload_queries"] = QUERY_COLLECTION_COUNTS.get(col_name)
        col_info["workload_percent"] = QUERY_COLLECTION_COUNTS.get(
            col_name) / float(TOTAL_QUERY_COUNT)
        try:
            col_info.save()
        except Exception as ex:
            LOG.warn(ex)
            pass
    ## FOR

    ## ----------------------------------------------

示例#4

0

显示文件

文件： results.py 项目： cmu-db/mongodb-d4

class Results:
    
    def __init__(self, config=None):
        self.start = None
        self.stop = None
        self.txn_id = 0
        self.opCount = 0
        self.completed = [ ] # (txnName, timestamp)
        self.txn_counters = Histogram()
        self.txn_times = { }
        self.running = { }
        self.config = config
        
    def startBenchmark(self):
        """Mark the benchmark as having been started"""
        assert self.start == None
        LOG.debug("Starting benchmark statistics collection")
        self.start = time.time()
        return self.start
        
    def stopBenchmark(self):
        """Mark the benchmark as having been stopped"""
        assert self.start != None
        assert self.stop == None
        LOG.debug("Stopping benchmark statistics collection")
        self.stop = time.time()
        
    def startTransaction(self, txn):
        self.txn_id += 1
        id = self.txn_id
        self.running[id] = (txn, time.time())
        return id
        
    def abortTransaction(self, id):
        """Abort a transaction and discard its times"""
        assert id in self.running
        txn_name, txn_start = self.running[id]
        del self.running[id]
        
    def stopTransaction(self, id, opCount, latencies=[]):
        """Record that the benchmark completed an invocation of the given transaction"""
        assert id in self.running
        
        timestamp = time.time()
        
        txn_name, txn_start = self.running[id]
        del self.running[id]
        self.completed.append((txn_name, timestamp, latencies))
        
        duration = timestamp - txn_start
        total_time = self.txn_times.get(txn_name, 0)
        self.txn_times[txn_name] = total_time + duration
        
        # OpCount
        if opCount is not None:
            self.opCount += opCount
        else: 
            LOG.debug("ithappens")
            
        
        # Txn Counter Histogram
        self.txn_counters.put(txn_name)
        assert self.txn_counters[txn_name] > 0
        
        if LOG.isEnabledFor(logging.DEBUG):
            LOG.debug("Completed %s in %f sec" % (txn_name, duration))
    ## DEF

    @staticmethod
    def show_table(title, headers, table, line_width):
        cols_width = [len(header) for header in headers]
        for row in table:
            row_width = 0
            for i in range(len(headers)):
                if len(row[i]) > cols_width[i]:
                    cols_width[i] = len(row[i])
                row_width += cols_width[i]
            row_width += 4 * (len(headers) - 1)
            if row_width > line_width:
                line_width = row_width
        output = ("%s\n" % ("=" * line_width))
        output += ("%s\n" % title)
        output += ("%s\n" % ("-" * line_width))
        for i in range(len(headers)):
            header = headers[i]
            output += ("%s%s" % (header, " " * (cols_width[i] - len(header))))
            if i != len(headers) - 1:
                output += " " * 4
        output += "\n"
        for row in table:
            for i in range(len(headers)):
                cell = row[i]
                output += ("%s%s" % (cell, " " * (cols_width[i] - len(cell))))
                if i != len(headers) - 1:
                    output += " " * 4
            output += "\n"
        output += ("%s\n" % ("-" * line_width))
        return output, line_width

    def show_latencies(self, line_width):
        latencies = []
        output = ""
        for txn_stats in self.completed:
            latencies.extend(txn_stats[2])
        if len(latencies) > 0:
            latencies = sorted(latencies, key=itemgetter(0))
            percents = [0.1, 0.2, 0.5, 0.8, 0.9, 0.999]
            latency_table = []
            slowest_ops = []
            for percent in percents:
                index = int(math.floor(percent * len(latencies)))
                percent_str = "%0.1f%%" % (percent * 100)
                millis_sec_str = "%0.4f" % (latencies[index][0])
                latency_table.append((percent_str, millis_sec_str))
            latency_headers = ["Queries(%)", "Latency(ms)"]
            output, line_width = \
                Results.show_table("Latency Report", latency_headers, latency_table, line_width)
            if self.config is not None and self.config["default"]["slow_ops_num"] > 0:
                num_ops = self.config["default"]["slow_ops_num"]
                slowest_ops_headers = ["#", "Latency(ms)", "Session Id", "Operation Id", "Type", "Collection", "Predicates"]
                for i in range(num_ops):
                    if i < len(latencies):
                        slowest_ops.append([
                            "%d" % i,
                            "%0.4f" % (latencies[len(latencies) - i - 1][0]),
                            str(latencies[len(latencies) - i - 1][1]),
                            str(latencies[len(latencies) - i - 1][2]),
                            latencies[len(latencies) - i - 1][3],
                            latencies[len(latencies) - i - 1][4],
                            json.dumps(latencies[len(latencies) - i - 1][5])
                        ])
                slowest_ops_output, line_width = \
                    Results.show_table("Top %d Slowest Operations" % num_ops, slowest_ops_headers, slowest_ops, line_width)
                output += ("\n%s" % slowest_ops_output)
        return output

    def append(self, r):  
        self.opCount += r.opCount
        for txn_name in r.txn_counters.keys():
            self.txn_counters.put(txn_name, delta=r.txn_counters[txn_name])
            
            orig_time = self.txn_times.get(txn_name, 0)
            self.txn_times[txn_name] = orig_time + r.txn_times[txn_name]
            
            #LOG.info("resOps="+str(r.opCount))
            #LOG.debug("%s [cnt=%d, time=%d]" % (txn_name, self.txn_counters[txn_name], self.txn_times[txn_name]))
        ## HACK
        if type(r.completed) == list:
            self.completed.extend(r.completed)
        if not self.start:
            self.start = r.start
        else:
            self.start = min(self.start, r.start)
        if not self.stop:
            self.stop = r.stop
        else:
            self.stop = max(self.stop, r.stop)
    ## DEF
            
    def __str__(self):
        return self.show()
        
    def show(self, load_time = None):
        if self.start == None:
            msg = "Attempting to get benchmark results before it was started"
            raise Exception(msg)
            LOG.warn(msg)
            return "Benchmark not started"
        if self.stop == None:
            duration = time.time() - self.start
        else:
            duration = self.stop - self.start
        
        col_width = 18
        total_width = (col_width*4)+2
        f = "\n  " + (("%-" + str(col_width) + "s")*4)
        line = "-"*total_width

        ret = u"" + "="*total_width + "\n"
        if load_time != None:
            ret += "Data Loading Time: %d seconds\n\n" % (load_time)
        
        ret += "Execution Results after %d seconds\n%s" % (duration, line)
        ret += f % ("", "Executed", u"Total Time (ms)", "Rate") 
        total_time = duration
        total_cnt = self.txn_counters.getSampleCount()
        #total_running_time = 0
        
        for txn in sorted(self.txn_counters.keys()):
            txn_time = self.txn_times[txn]
            txn_cnt = "%6d - %4.1f%%" % (self.txn_counters[txn], (self.txn_counters[txn] / float(total_cnt))*100)
            rate = u"%.02f txn/s" % ((self.txn_counters[txn] / total_time))
            #total_running_time +=txn_time
            #rate = u"%.02f op/s" % ((self.txn_counters[txn] / total_time))
            rate = u"%.02f op/s" % ((self.opCount / total_time))
            ret += f % (txn, txn_cnt, str(txn_time * 1000), rate)
            
            #LOG.info("totalOps="+str(self.totalOps))
            # total_time += txn_time
        ret += "\n" + ("-"*total_width)
        
        rate = 0
        if total_time > 0:
            rate = total_cnt / float(total_time)
            # TXN RATE rate = total_cnt / float(total_time)
        #total_rate = "%.02f txn/s" % rate
        total_rate = "%.02f op/s" % rate
        #total_rate = str(rate)
        ret += f % ("TOTAL", str(total_cnt), str(total_time*1000), total_rate)

        return ("%s\n%s" % (ret, self.show_latencies(total_width))).encode('utf-8')

示例#5

0

显示文件

文件： results.py 项目： greinerb/mongodb-d4

class Results:
    def __init__(self):
        self.start = None
        self.stop = None
        self.txn_id = 0
        self.opCount = 0
        self.completed = []  # (txnName, timestamp)
        self.txn_counters = Histogram()
        self.txn_times = {}
        self.running = {}

    def startBenchmark(self):
        """Mark the benchmark as having been started"""
        assert self.start == None
        LOG.debug("Starting benchmark statistics collection")
        self.start = time.time()
        return self.start

    def stopBenchmark(self):
        """Mark the benchmark as having been stopped"""
        assert self.start != None
        assert self.stop == None
        LOG.debug("Stopping benchmark statistics collection")
        self.stop = time.time()

    def startTransaction(self, txn):
        self.txn_id += 1
        id = self.txn_id
        self.running[id] = (txn, time.time())
        return id

    def abortTransaction(self, id):
        """Abort a transaction and discard its times"""
        assert id in self.running
        txn_name, txn_start = self.running[id]
        del self.running[id]

    def stopTransaction(self, id, opCount):
        """Record that the benchmark completed an invocation of the given transaction"""
        assert id in self.running

        timestamp = time.time()

        txn_name, txn_start = self.running[id]
        del self.running[id]
        self.completed.append((txn_name, timestamp))

        duration = timestamp - txn_start
        total_time = self.txn_times.get(txn_name, 0)
        self.txn_times[txn_name] = total_time + duration

        # OpCount
        if opCount is not None:
            self.opCount += opCount
        else:
            LOG.debug("ithappens")

        # Txn Counter Histogram
        self.txn_counters.put(txn_name)
        assert self.txn_counters[txn_name] > 0

        if LOG.isEnabledFor(logging.DEBUG):
            LOG.debug("Completed %s in %f sec" % (txn_name, duration))

    ## DEF

    def append(self, r):
        self.opCount += r.opCount
        for txn_name in r.txn_counters.keys():
            self.txn_counters.put(txn_name, delta=r.txn_counters[txn_name])

            orig_time = self.txn_times.get(txn_name, 0)
            self.txn_times[txn_name] = orig_time + r.txn_times[txn_name]

            #LOG.info("resOps="+str(r.opCount))
            #LOG.debug("%s [cnt=%d, time=%d]" % (txn_name, self.txn_counters[txn_name], self.txn_times[txn_name]))
        ## HACK
        if type(r.completed) == list:
            self.completed.extend(r.completed)
        if not self.start:
            self.start = r.start
        else:
            self.start = min(self.start, r.start)
        if not self.stop:
            self.stop = r.stop
        else:
            self.stop = max(self.stop, r.stop)

    ## DEF

    def __str__(self):
        return self.show()

    def show(self, load_time=None):
        if self.start == None:
            msg = "Attempting to get benchmark results before it was started"
            raise Exception(msg)
            LOG.warn(msg)
            return "Benchmark not started"
        if self.stop == None:
            duration = time.time() - self.start
        else:
            duration = self.stop - self.start

        col_width = 18
        total_width = (col_width * 4) + 2
        f = "\n  " + (("%-" + str(col_width) + "s") * 4)
        line = "-" * total_width

        ret = u"" + "=" * total_width + "\n"
        if load_time != None:
            ret += "Data Loading Time: %d seconds\n\n" % (load_time)

        ret += "Execution Results after %d seconds\n%s" % (duration, line)
        ret += f % ("", "Executed", u"Total Time (ms)", "Rate")
        total_time = duration
        total_cnt = self.txn_counters.getSampleCount()
        #total_running_time = 0

        for txn in sorted(self.txn_counters.keys()):
            txn_time = self.txn_times[txn]
            txn_cnt = "%6d - %4.1f%%" % (
                self.txn_counters[txn],
                (self.txn_counters[txn] / float(total_cnt)) * 100)
            rate = u"%.02f txn/s" % ((self.txn_counters[txn] / total_time))
            #total_running_time +=txn_time
            #rate = u"%.02f op/s" % ((self.txn_counters[txn] / total_time))
            #rate = u"%.02f op/s" % ((self.opCount / total_time))
            ret += f % (txn, txn_cnt, str(txn_time * 1000), rate)

            #LOG.info("totalOps="+str(self.totalOps))
            # total_time += txn_time
        ret += "\n" + ("-" * total_width)

        rate = 0
        if total_time > 0:
            rate = total_cnt / float(total_time)
            # TXN RATE rate = total_cnt / float(total_time)
        #total_rate = "%.02f txn/s" % rate
        total_rate = "%.02f op/s" % rate
        #total_rate = str(rate)
        ret += f % ("TOTAL", str(total_cnt), str(
            total_time * 1000), total_rate)

        return (ret.encode('utf-8'))

示例#6

0

显示文件

文件： results.py 项目： craigmartin/mongodb-d4

class Results:
    
    def __init__(self):
        self.start = None
        self.stop = None
        self.txn_id = 0
        self.opCount = 0
        self.completed = [ ] # (txnName, timestamp)
        self.txn_counters = Histogram()
        self.txn_times = { }
        self.running = { }
        
    def startBenchmark(self):
        """Mark the benchmark as having been started"""
        assert self.start == None
        LOG.debug("Starting benchmark statistics collection")
        self.start = time.time()
        return self.start
        
    def stopBenchmark(self):
        """Mark the benchmark as having been stopped"""
        assert self.start != None
        assert self.stop == None
        LOG.debug("Stopping benchmark statistics collection")
        self.stop = time.time()
        
    def startTransaction(self, txn):
        self.txn_id += 1
        id = self.txn_id
        self.running[id] = (txn, time.time())
        return id
        
    def abortTransaction(self, id):
        """Abort a transaction and discard its times"""
        assert id in self.running
        txn_name, txn_start = self.running[id]
        del self.running[id]
        
    def stopTransaction(self, id, opCount):
        """Record that the benchmark completed an invocation of the given transaction"""
        assert id in self.running
        
        timestamp = time.time()
        
        txn_name, txn_start = self.running[id]
        del self.running[id]
        self.completed.append((txn_name, timestamp))
        
        duration = timestamp - txn_start
        total_time = self.txn_times.get(txn_name, 0)
        self.txn_times[txn_name] = total_time + duration
        
        # OpCount
        if opCount is not None:
            self.opCount += opCount
        else: 
            LOG.debug("ithappens")
            
        
        # Txn Counter Histogram
        self.txn_counters.put(txn_name)
        assert self.txn_counters[txn_name] > 0
        
        if LOG.isEnabledFor(logging.DEBUG):
            LOG.debug("Completed %s in %f sec" % (txn_name, duration))
    ## DEF
        
    def append(self, r):  
        self.opCount += r.opCount
        for txn_name in r.txn_counters.keys():
            self.txn_counters.put(txn_name, delta=r.txn_counters[txn_name])
            
            orig_time = self.txn_times.get(txn_name, 0)
            self.txn_times[txn_name] = orig_time + r.txn_times[txn_name]
            
            #LOG.info("resOps="+str(r.opCount))
            #LOG.debug("%s [cnt=%d, time=%d]" % (txn_name, self.txn_counters[txn_name], self.txn_times[txn_name]))
        ## HACK
        if type(r.completed) == list:
            self.completed.extend(r.completed)
        if not self.start:
            self.start = r.start
        else:
            self.start = min(self.start, r.start)
        if not self.stop:
            self.stop = r.stop
        else:
            self.stop = max(self.stop, r.stop)
    ## DEF
            
    def __str__(self):
        return self.show()
        
    def show(self, load_time = None):
        if self.start == None:
            msg = "Attempting to get benchmark results before it was started"
            raise Exception(msg)
            LOG.warn(msg)
            return "Benchmark not started"
        if self.stop == None:
            duration = time.time() - self.start
        else:
            duration = self.stop - self.start
        
        col_width = 18
        total_width = (col_width*4)+2
        f = "\n  " + (("%-" + str(col_width) + "s")*4)
        line = "-"*total_width

        ret = u"" + "="*total_width + "\n"
        if load_time != None:
            ret += "Data Loading Time: %d seconds\n\n" % (load_time)
        
        ret += "Execution Results after %d seconds\n%s" % (duration, line)
        ret += f % ("", "Executed", u"Total Time (ms)", "Rate") 
        total_time = duration
        total_cnt = self.txn_counters.getSampleCount()
        #total_running_time = 0
        
        for txn in sorted(self.txn_counters.keys()):
            txn_time = self.txn_times[txn]
            txn_cnt = "%6d - %4.1f%%" % (self.txn_counters[txn], (self.txn_counters[txn] / float(total_cnt))*100)
            rate = u"%.02f txn/s" % ((self.txn_counters[txn] / total_time))
            #total_running_time +=txn_time
            #rate = u"%.02f op/s" % ((self.txn_counters[txn] / total_time))
            #rate = u"%.02f op/s" % ((self.opCount / total_time))
            ret += f % (txn, txn_cnt, str(txn_time * 1000), rate)
            
            #LOG.info("totalOps="+str(self.totalOps))
            # total_time += txn_time
        ret += "\n" + ("-"*total_width)
        
        rate = 0
        if total_time > 0:
            rate = total_cnt / float(total_time)
            # TXN RATE rate = total_cnt / float(total_time)
        #total_rate = "%.02f txn/s" % rate
        total_rate = "%.02f op/s" % rate
        #total_rate = str(rate)
        ret += f % ("TOTAL", str(total_cnt), str(total_time*1000), total_rate)

        return (ret.encode('utf-8'))

示例#7

0

显示文件

文件： dba-export.py 项目： cmu-db/mongodb-d4

        raise Exception("No collections were found in metadata catalog")
    
    for sess in metadata_db.Session.fetch():
        for op in sess["operations"]:
            QUERY_COUNTS.put(op["query_hash"])
            if not op["query_hash"] in QUERY_HASH_XREF:
                QUERY_HASH_XREF[op["query_hash"]] = [ ]
            QUERY_HASH_XREF[op["query_hash"]].append(op)
            QUERY_COLLECTION_COUNTS.put(op["collection"])
        ## FOR
    ## FOR

    LOG.info("Toal # of Unique Queries: %d", len(QUERY_COUNTS.values()))
    TOTAL_DB_SIZE = sum([col_info["data_size"] for col_info in colls.itervalues()])
    LOG.debug("Estimated Total Database Size: %d" % TOTAL_DB_SIZE)
    TOTAL_QUERY_COUNT = QUERY_COLLECTION_COUNTS.getSampleCount()
    LOG.debug("Total # of Queries: %d" % TOTAL_QUERY_COUNT)
    
    # HACK: Fix collections
    for col_name, col_info in colls.iteritems():
        col_info["workload_queries"] = QUERY_COLLECTION_COUNTS.get(col_name)
        col_info["workload_percent"] = QUERY_COLLECTION_COUNTS.get(col_name) / float(TOTAL_QUERY_COUNT)
        try:
            col_info.save()
        except Exception as ex:
            LOG.warn(ex)
            pass
    ## FOR
    
    ## ----------------------------------------------
    ## DUMP DATABASE SCHEMA