Exemplo n.º 1
0
 def add(self, uuid):
     """ Adds a key to the HyperLogLog """
     if uuid:
         # Computing the hash
         try:
             x = smhasher.murmur3_x64_64(uuid)
         except UnicodeEncodeError:
             x = smhasher.murmur3_x64_64(uuid.encode('ascii', 'ignore'))
         # Finding the register to update by using the first b bits as an index
         j = x & ((1 << self.b) - 1)
         # Remove those b bits
         w = x >> self.b
         # Find the first 0 in the remaining bit pattern
         self.M[j] = max(self.M[j], self._get_rho(w, self.bitcount_arr))
Exemplo n.º 2
0
 def Hit(self, e):
     self.hits += 1
     self.requests += 1
     key = e.key
     hv = smhasher.murmur3_x64_64(key)
     if (hv % self.RLIMIT) <  self.R:
         #print "Hit passed"
         self.Rpassed += 1
     else:
         #print "Hit ignored"
         self.Rignored += 1
         return
     ma = self.map_activity(e.activity)
     self.binsizesum += self.activecount[ma]
     self.update_pdf(ma)  # We want to update the PDF before changing any activitycount
     if self.R == 1:
         assert self.activecount[ma] >= 1
     self.activecount[ma] -= 1
     add = 0
     if ma == self.MAX_ACTIVITY:
         add = 1
     if self.activecount[-1] + add >= self.binsize:
         self.ageentries()
     self.activecount[self.MAX_ACTIVITY] += 1
     e.activity = self.TOP_VAL
     if self.R == 1:
         assert abs(self.hits - sum(self.pdf.values())) < 1.0e-5
Exemplo n.º 3
0
 def Hit(self, e):
     self.hits += 1
     self.requests += 1
     key = e.key
     hv = smhasher.murmur3_x64_64(key)
     if (hv % self.RLIMIT) < self.R:
         #print "Hit passed"
         self.Rpassed += 1
     else:
         #print "Hit ignored"
         self.Rignored += 1
         return
     ma = self.map_activity(e.activity)
     self.binsizesum += self.activecount[ma]
     self.update_pdf(
         ma)  # We want to update the PDF before changing any activitycount
     if self.R == 1:
         assert self.activecount[ma] >= 1
     self.activecount[ma] -= 1
     add = 0
     if ma == self.MAX_ACTIVITY:
         add = 1
     if self.activecount[-1] + add >= self.binsize:
         self.ageentries()
     self.activecount[self.MAX_ACTIVITY] += 1
     e.activity = self.TOP_VAL
     if self.R == 1:
         assert abs(self.hits - sum(self.pdf.values())) < 1.0e-5
Exemplo n.º 4
0
 def increment(self, element: object) -> None:
     element_str = str(element).encode('utf-8')
     h = smhasher.murmur3_x64_64(
         element_str) % self.hash_function_upper_bound
     bucket_idx = self.get_bucket_id(h)
     self.max_hash_value_by_buckets[bucket_idx] = max(
         self.max_hash_value_by_buckets[bucket_idx],
         self.count_leading_zeros(h))
Exemplo n.º 5
0
 def Evict(self, key):
     hv = smhasher.murmur3_x64_64(key)
     #print "Evict", hv
     if (hv % self.R) != 0:
         return
     if self.R * self.counters[self.first] >= self.capacity_per_filter:
         self.rotateFilters()
     if not self.cfs[self.first].check(key):
         #print "inserting %s into the first filter" % key
         self.cfs[self.first].add(key, 0)
         self.counters[self.first] += 1
Exemplo n.º 6
0
 def Evict(self, key):
     hv = smhasher.murmur3_x64_64(key)
     #print "Evict", hv
     if (hv % self.R) != 0:
         return
     if self.R * self.counters[self.first] >= self.capacity_per_filter:
         self.rotateFilters()
     if not self.cfs[self.first].check(key):
         #print "inserting %s into the first filter" % key
         self.cfs[self.first].add(key, 0)
         self.counters[self.first] += 1
Exemplo n.º 7
0
 def Set(self, e):
     key = e.key
     hv = smhasher.murmur3_x64_64(key)
     if (hv % self.RLIMIT) < self.R:
         #print "Set passed"
         self.Rpassed += 1
     else:
         #print "Set ignore"
         #self.Rignored += 1
         return
     e.activity = self.TOP_VAL
     self.activecount[self.MAX_ACTIVITY] += 1
Exemplo n.º 8
0
 def Set(self, e):
     key = e.key
     hv = smhasher.murmur3_x64_64(key)
     if (hv % self.RLIMIT) <  self.R:
         #print "Set passed"
         self.Rpassed += 1
     else:
         #print "Set ignore"
         #self.Rignored += 1
         return
     e.activity = self.TOP_VAL
     self.activecount[self.MAX_ACTIVITY] += 1
Exemplo n.º 9
0
 def _make_hashfuncs(key):
     if isinstance(key, text_type):
         key = key.encode('utf-8')
     else:
         key = str(key)
     rval = []
     current_hash = None
     for i in range(nbr_slices):
         seed = current_hash or 0
         current_hash = smhasher.murmur3_x64_64(key, seed)
         rval.append(current_hash % nbr_bits)
     return rval
Exemplo n.º 10
0
 def Evict(self, e):
     key = e.key
     hv = smhasher.murmur3_x64_64(key)
     if (hv % self.RLIMIT) < self.R:
         self.Rpassed += 1
     else:
         #print "Evict ignore"
         self.Rignored += 1
         return
     ma = self.map_activity(e.activity)
     self.activecount[ma] -= 1
     if self.R == 1:
         assert self.activecount[ma] >= 0
Exemplo n.º 11
0
 def Evict(self, e):
     key = e.key
     hv = smhasher.murmur3_x64_64(key)
     if (hv % self.RLIMIT) <  self.R:
         self.Rpassed += 1
     else:
         #print "Evict ignore"
         self.Rignored += 1
         return
     ma = self.map_activity(e.activity)
     self.activecount[ma] -= 1
     if self.R == 1:
         assert self.activecount[ma] >= 0
Exemplo n.º 12
0
    def Miss(self, key):
        hv = smhasher.murmur3_x64_64(key)
        if (hv % self.R) != 0:
            self.Rignored += 1
            return
        else:
            self.Rpassed += 1
        found = False
        start = 0
        end = 0
        val = 0
        first = self.first % 3
        second = (self.first + 1) % 3
        last = (self.first + 2) % 3

        for index in [first, second, last]:
            end += self.R * self.counters[index]
            if self.cfs[index].check(key):
                found = True
                break
            start += self.R * self.counters[index]

        if found:
            if end > self.capacity:
                if start < self.capacity:
                    # This is overestimating values from [capacity,...,1.5*capacity]
                    # hence scale down
                    x = self.R * (1.0 - FPP_RATE) * (end - self.capacity +
                                                     0.0) / (end - start + 0.0)
                    assert x >= 0
                    self.ghosthits += x
                else:
                    self.ghosthits += 0  # hit at stack distance greater than the capacity
            else:
                self.ghosthits += self.R * (1.0 - FPP_RATE)
            val = self.R * (1.0 / (end - start)) * (1.0 - FPP_RATE)
            for i in xrange(start, min(int(1.5 * self.capacity), end)):
                self.pdf[i] += val
        else:
            self.ghostmisses += 1

        return found
Exemplo n.º 13
0
    def Miss(self, key):
        hv = smhasher.murmur3_x64_64(key)
        if (hv % self.R) != 0:
            self.Rignored += 1
            return
        else:
            self.Rpassed += 1
        found = False
        start = 0
        end = 0
        val = 0
        first = self.first % 3
        second = (self.first + 1) % 3
        last = (self.first + 2) % 3

        for index in [first, second, last]:
            end += self.R * self.counters[index]
            if self.cfs[index].check(key):
                found = True
                break
            start += self.R * self.counters[index]

        if found:
            if end > self.capacity:
                if start < self.capacity:
                    # This is overestimating values from [capacity,...,1.5*capacity]
                    # hence scale down
                    x = self.R * (1.0 - FPP_RATE) * (end - self.capacity + 0.0) / (end - start + 0.0)
                    assert x >= 0
                    self.ghosthits += x
                else:
                    self.ghosthits += 0  # hit at stack distance greater than the capacity
            else:
                self.ghosthits += self.R * (1.0 - FPP_RATE)
            val = self.R * ( 1.0 / (end - start) ) * (1.0 - FPP_RATE)
            for i in xrange(start, min(int(1.5*self.capacity), end)):
                self.pdf[i] += val
        else:
            self.ghostmisses += 1

        return found
Exemplo n.º 14
0
	def hashval(self, message):
		h = smhasher.murmur3_x64_64(message)
		final = h / INT_64_MAX
		return final
Exemplo n.º 15
0
import sys
import smhasher

random_str = "random_str_"
for num in range (0,100):
        new_str = random_str + str(num)
        val = abs(smhasher.murmur3_x64_64(new_str,0x1234ABCD)) % 4
        print val
Exemplo n.º 16
0
def shard_from_id(id):
    index = smhasher.murmur3_x64_64(id) % 2
    shard = 'shard' + str(index + 1)
    return shard