def __init__(self, config, genHashes): """ Args: config (dictionary):contains configuration data for this hashing object genHashes (boolean) : whether or not to generate new hash seeds/coefficients (Task 2 provides hash seeds/coefficients, Tasks 1 and 3 require you to make them yourself) """ self.k = config['k'] self.n = config['n'] self.N = config['N'] self.Prime = util.findNextPrime(self.n) self.NPrime = util.findNextPrime(self.N) # generate new random hashes, or not if task2 if genHashes: # set random seed to be generated seed at config load # random.seed(config['genSeed']) # build lists of coefficients self.a and self.b self.a = [] self.b = [] for i in range(0, self.k): self.a.append(random.randint(1, self.n)) self.b.append(random.randint(0, self.n)) # if not gen (task 2), then use hashes that are in config dictionary else: self.a = config['a'] self.b = config['b']
def __init__(self, config): """ Args: config (dictionary): Configuration of this bloom filter config['N'] (int) universe size config['m'] (int) number of elements to add to filter config['n'] (int) number of bits in bloom filter storage array config['k'] (int) number of hash functions to use config['task'] (int) the task this bloom filter is to perform (1,2,3) config['type'] (int) type of hash function (1, 2, -1==unknown type) if type 1 hash : config['seeds'] (list of k ints) : seed values for k hash functions for type 1 hash function if type 2 hash : config['a'] (list of k ints) : a coefficients for k hash functions for type 2 hash function config['b'] (list of k ints) : b coefficients for k hash functions for type 2 hash function genHashes (boolean) : whether or not to generate new hash seeds/coefficients (Task 2 provides hash seeds/coefficients, Tasks 1 and 3 require you to make them yourself) """ # task this boom filter is performing self.task = config['task'] # task 1 and 3 reguire generated seeds for hashes, task 2 uses provided seeds/coefficients genHashes = (self.task != 3) # type of hash for this bloom filter self.type = config['type'] # check if n is prime n = int(config['n']) if not util.checkIfPrime(n): config['n'] = util.findNextPrime(n + 1) else: config['n'] = n P = int(config['N']) if not util.checkIfPrime(P): config['P'] = util.findNextPrime(P + 1) else: config['P'] = P if (self.type == 1): self.hashFunc = HashType1(config, genHashes) elif (self.type == 2): self.hashFunc = HashType2(config, genHashes) # elif(self.type == 3): #add your own hashes else: print('BloomFilter for task ' + str(self.task) + ' ctor : Unknown Hash type : ' + str(self.type)) bf_size = self.hashFunc.n bf_arr = [] for i in range(0, bf_size): bf_arr.append(0) self.bf_arr = bf_arr
def __init__(self, config, genHashes, c): """ Args: config (dictionary):contains configuration data for this hashing object genHashes (boolean) : whether or not to generate new hash seeds/coefficients (Task 2 provides hash seeds/coefficients, Tasks 1 and 3 require you to make them yourself) """ self.k = config['k'] self.N = config['N'] self.P = util.findNextPrime(self.N) if config['task'] == 3: self.n = c * config['m'] else: self.n = config['n'] #generate new random hashes, or not if task2 if genHashes: #set random seed to be generated seed at config load random.seed(config['genSeed']) #build lists of coefficients self.a and self.b self.a = random.randint(low=0, high=(self.n - 1), size=self.k) self.b = random.randint(low=0, high=(self.n - 1), size=self.k) #if not gen (task 2), then use hashes that are in config dictionary else: self.a = config['a'] self.b = config['b']
def __init__(self, config, genHashes): self.k = config['k'] self.n = config['n'] self.N = config['N'] self.prime = util.findNextPrime(self.n) self.NPrime = util.findNextPrime(self.N) #generate new random hashes, or not if task2 if genHashes: #set random seed to be generated seed at config load # random.seed(config['genSeed']) #build lists of coefficients self.a and self.b a = [] b = [] for i in range(0, self.k): a.append(random.randint(1, self.n - 1)) b.append(random.randint(0, self.n - 1)) self.a = a self.b = b #if not gen (task 2), then use hashes that are in config dictionary else: self.a = config['a'] self.b = config['b']
def computeFalsePositive(configData, numTrials, k, c, hashType): configData['k'] = k configData['type'] = hashType configData['n'] = util.findNextPrime(c * configData['m']) sumFalsePositive = 0 for i in range(0, numTrials): # initialize bloom filter bf = BloomFilter(configData) bfInputData = util.readIntFileDat(configData['inFileName']) # add data to bloom filter for j in range(0, configData['m']): bf.add(bfInputData[j]) falsePositive = 0 # test false positive for l in range(configData['m'], len(bfInputData)): if bf.contains(bfInputData[l]): falsePositive += 1 sumFalsePositive += falsePositive / float(configData['m']) avgFalsePositive = sumFalsePositive / numTrials return avgFalsePositive