def task2(configData): # instantiate bloom filter object bf = BloomFilter(configData) # bfInputData holds a list of integers. Using these values you must : # insert the first configData['m'] of them into the bloom filter # test all of them for membership in the bloom filter bfInputData = util.readIntFileDat(configData['inFileName']) if (len(bfInputData) == 0): print('No Data to add to bloom filter') return else: print('bfInputData has ' + str(len(bfInputData)) + ' elements') # testBF will insert elements and test membership outputResList = testBF(bfInputData, bf, configData['m']) # write results to output file util.writeFileDat(configData['outFileName'], outputResList) # load appropriate validation data list for this hash function and compare to results util.compareResults(outputResList, configData) print('Task 2 complete')
def task3(configData): # if you wish to use this code to perform task 3, you may do so # NOTE task 3 will require you to remake your bloom filter multiple times to perform the appropriate trials # this will necessitate either making a new bloom filter constructor or changing the config dictionary to # hold the appropriate values for k and n (filter size) based on c value, derived as in the notes # REMEMBER for type 2 hashes n must be prime. util.findNextPrime(n) is provided for you to use to find the next largest # prime value of some integer. c = 10 # 10 or 15 const_lst = [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] k_list = [] for const in const_lst: k = int(const * c) k_list.append(k) n = 1917017 m = int(round(n / c, 0)) z = m + m dataSet = set() # m = 10000000 # number of desired elements total to use maxVal = 2147483647 # == 2**31-1 done = False mT = m // 100 # some amount of randoms to generate per iteration, faster than 1 at a time while not done: rndList = np.random.randint(0, maxVal, mT) [dataSet.add(x) for x in rndList] done = (len(dataSet) >= z) added = list(dataSet)[:m] # keep just first m values - not necessary witheld = list(dataSet)[m + 1:] listof_seedlist = [] seed_list = [] listof_alist = [] a_list = [] listof_blist = [] b_list = [] for no in k_list: for i in range(0, no): seed_list.append(random.randint(0, n)) listof_seedlist.append(seed_list) seed_list = [] for no in k_list: for i in range(0, no): a_list.append(random.randint(1, n)) listof_alist.append(a_list) a_list = [] for no in k_list: for i in range(0, no): b_list.append(random.randint(0, n)) listof_blist.append(b_list) b_list = [] # override parameters # configData['k'] configData['seeds'] configData['a'] configData['b'] # configData['n'] configData['m'], configData['N'] # configData['type'] configData['genSeed'] genHashes = False listof_trail_hf1_list = [] trail_hf1_list = [] trail_hf2_list = [] listof_trail_hf2_list = [] # hash function 1 for k1 in k_list: configData['k'] = k1 configData['seeds'] = listof_seedlist[k_list.index(k1)] configData['n'] = n configData['m'] = m configData['genSeed'] = 1994958112 configData['type'] = 1 for i1 in range(0, 10): bf = BloomFilter(configData) outputResList = testBF(added, witheld, bf, configData['m']) util.writeFileDat(configData['outFileName'], outputResList) # load appropriate validation data list for this hash function and compare to results util.compareResults(outputResList, configData) '''print len(witheld) print len(outputResList) if (len(witheld) != len(outputResList)): print('compareFiles : Failure : Attempting to compare different size lists') return None numFail = 0 numFTrueRes = 0 numFFalseRes = 0 for i in range(len(outputResList)): if (outputResList[i].strip().lower() != added[i].strip().lower()): resVal = outputResList[i].strip().lower() baseResVal = witheld[i].strip().lower() # uncomment this to see inconsistencies print('i : ' + str(i) + ': reslist : ' + resVal + ' | baseres : ' + baseResVal) numFail += 1 if resVal == 'true': numFTrueRes += 1 else: numFFalseRes += 1 if (numFail == 0): print('compareResults : Your bloom filter performs as expected') else: print('compareResults : Number of mismatches in bloomfilter compared to validation file : ' + str( numFail) + '| # of incorrect true results : ' + str( numFTrueRes) + '| # of incorrect False results : ' + str(numFFalseRes)) if ((configData['studentName'] != '') and (configData['autograde'] == 2)): gradeRes = configData['studentName'] + ', ' + str(numFail) + ', ' + str(numFTrueRes) + ', ' + str( numFFalseRes) print('saving results for ' + gradeRes + ' to autogradeResult.txt')''' trail_hf1_list.append('a') listof_trail_hf1_list.append(trail_hf1_list) trail_hf1_list = [] print c print k_list print m print n print listof_seedlist print listof_alist print listof_blist print('Task 3 complete')