示例#1
0
    def __init__(self, maxCardinality, error_rate):
        """Implementes a LogLog Sketch
        *maxCardinality
        this Sketch is able to count cardinalities up to cardinality *maxCardinality*
        error_rate
        the error_rate of the sketch when calculating the cardinality of the set
        """ 
        if not (0 < error_rate < 1):
            raise ValueError("Error_Rate must be between 0 and 1.")
        if not maxCardinality > 0:
            raise ValueError("maxCardinality must be > 0")

        self._maxCardinality = maxCardinality
        #k     
        self._k = int(round(log(pow(1.30/error_rate,2),2)))
        # m = 2**k
        self._bucketNumber = 1<<self._k

        self._bucketSize = compute_wordsize(self._maxCardinality)

        #M(1)... M(m) = 0
        
        self._bucketList =[bitarray(self._bucketSize) for _ in xrange(self._bucketNumber)]
        for barray in self._bucketList:
            barray.setall(False)

        self.__name = "LogLog"
示例#2
0
    def __init__(self, maxCardinality, error_rate):
        """Implementes a HyperLogLog Sketch
        *maxCardinality
        this Sketch is able to count cardinalities up to cardinality *maxCardinality*
        error_rate
        the error_rate of the sketch when calculating the cardinality of the set
        """
        self.__ALPHA16=0.673
        self.__ALPHA32=0.697
        self.__ALPHA64=0.709
            
        if not (0 < error_rate < 1):
            raise ValueError("Error_Rate must be between 0 and 1.")
        if not maxCardinality > 0:
            raise ValueError("maxCardinality must be > 0")

        self._maxCardinality = maxCardinality
        #k     
        self._k = int(round(log(pow(1.04/error_rate,2),2)))
        # m = 2**k
        self._bucketNumber = 1<<self._k

        self._bucketSize = compute_wordsize(self._maxCardinality)

        #M(1)... M(m) = 0
        
        self._bucketList =[0 for _ in xrange(self._bucketNumber)]
        
        self.__name = "HyperLogLog"
        self._alpha = self.__getALPHA(self._bucketNumber)