Exemplo n.º 1
0
class BloomFilter():

    def __init__(self, *hash_functions, **kwds):
        """ @param max_size: In bytes 
        """
        self.__bitset = BitSet()
        
        if not hash_functions:
            hash_functions = (object_hash, object_repr_hash, object_str_hash, object_id)
        self.__hash_functions = hash_functions
        
        max_size = kwds.get("max_size", 1024)
        itemsize = self.__bitset.itemsize
        self.__max_bits = int(itemsize * ceil(float(max_size) / itemsize)) << 3
    
    def extend(self, values):
        map(self.add, values)
    
    def __get_bit_indexes_of(self, value):
        return (hf(value) % self.__max_bits for hf in self.__hash_functions)
    
    def add(self, value):
        indexes_to_set = self.__get_bit_indexes_of(value)
        self.__bitset.set_indexes(indexes_to_set)
    
    def __contains__(self, value):
        indexes_to_get = self.__get_bit_indexes_of(value)
        bits = (index in self.__bitset for index in indexes_to_get)
        return all(bits)
    
    def __len__(self):
        return len(self.__bitset) >> 3
    
    def __repr__(self):
        return "BloomFilter (%s bytes): %s" % (len(self), self.__bitset)
Exemplo n.º 2
0
 def __init__(self, *hash_functions, **kwds):
     """ @param max_size: In bytes 
     """
     self.__bitset = BitSet()
     
     if not hash_functions:
         hash_functions = (object_hash, object_repr_hash, object_str_hash, object_id)
     self.__hash_functions = hash_functions
     
     max_size = kwds.get("max_size", 1024)
     itemsize = self.__bitset.itemsize
     self.__max_bits = int(itemsize * ceil(float(max_size) / itemsize)) << 3
Exemplo n.º 3
0
def do_assignment(repeats=10, size=5000, m=16):
    med = 0
    maximals = []
    print("Number of non-dominated BitStrings: ")
    for _ in range(repeats):
        bset = BitSet(size, m)
        bset.find_dominated()
        maximal = len(bset.no_dom)
        maximals.append(maximal)
        print(maximal, sep=' ', end=' ', flush=True)
    print()
    med = median(maximals)
    print("Approximation of E[M_n]: ", med)

    results = "Bits in a string: {}\nSet size: {}\nNumber of sets: {}\nApproximation of E[M_n]: {}\n----\n"

    with open("generated.txt", "a+") as file:
        file.write(results.format(m, size, repeats, med))
Exemplo n.º 4
0
    def load_name_index(cls, bits):
        result = {} # string -> int
        ver = bits.read_int32()    # 0..3   Version
        sig = bits.read_int32()    # 4..7   Signature
        age = bits.read_int32()    # 8..11  Age
        guid = bits.read_guid()    # 12..27 GUID

        #if ver != 20000404:
        #  raise PdbDebugException('Unsupported PDB Stream version {%u' % ver)

        # Read string buffer.
        buf = bits.read_int32()    # 28..31 Bytes of Strings

        beg = bits.position
        nxt = bits.position + buf

        bits.position = nxt

        # Read map index.
        cnt = bits.read_int32() # n+0..3 hash size.
        mx = bits.read_int32() # n+4..7 maximum ni.

        present = BitSet(bits)
        deleted = BitSet(bits)
        if not deleted.is_empty():
            raise PdbDebugException('Unsupported PDB deleted bitset is not empty.')

        j = 0;
        for i in range(0, mx):
            if present.is_set(i):
                ns = bits.read_int32()
                ni = bits.read_int32()

                saved = bits.position
                bits.position = beg + ns
                name = bits.read_cstring()
                bits.position = saved

                result[name.upper()] = ni
                j += 1
        if j != cnt:
            raise PdbDebugException('Count mismatch. (%u != %u)' % (j, cnt))
        return (result, ver, sig, age, guid)
Exemplo n.º 5
0
    def load_name_index(cls, bits):
        result = {}  # string -> int
        ver = bits.read_int32()  # 0..3   Version
        sig = bits.read_int32()  # 4..7   Signature
        age = bits.read_int32()  # 8..11  Age
        guid = bits.read_guid()  # 12..27 GUID

        #if ver != 20000404:
        #  raise PdbDebugException('Unsupported PDB Stream version {%u' % ver)

        # Read string buffer.
        buf = bits.read_int32()  # 28..31 Bytes of Strings

        beg = bits.position
        nxt = bits.position + buf

        bits.position = nxt

        # Read map index.
        cnt = bits.read_int32()  # n+0..3 hash size.
        mx = bits.read_int32()  # n+4..7 maximum ni.

        present = BitSet(bits)
        deleted = BitSet(bits)
        if not deleted.is_empty():
            raise PdbDebugException(
                'Unsupported PDB deleted bitset is not empty.')

        j = 0
        for i in range(0, mx):
            if present.is_set(i):
                ns = bits.read_int32()
                ni = bits.read_int32()

                saved = bits.position
                bits.position = beg + ns
                name = bits.read_cstring()
                bits.position = saved

                result[name.upper()] = ni
                j += 1
        if j != cnt:
            raise PdbDebugException('Count mismatch. (%u != %u)' % (j, cnt))
        return (result, ver, sig, age, guid)
Exemplo n.º 6
0
def one_set():
    bset = BitSet()
    bset.find_dominated()
    print(str(bset))
Exemplo n.º 7
0
 def dotest(l):
     l.sort()
     x = BitSet()
     for i in l:
         x[i] = 1
     self.assertEqual(x.get_bits(), l)
Exemplo n.º 8
0
 def find_matches(self, inverted_index):
     bitset = BitSet()
     bitset.set_indexes(inverted_index.get_doc_ids_with(self._field_id, self._value))
     return bitset