Exemplo n.º 1
0
def task2(configData):
    # instantiate bloom filter object
    bf = BloomFilter(configData)

    # bfInputData holds a list of integers.  Using these values you must :
    #   insert the first configData['m'] of them into the bloom filter
    #   test all of them for membership in the bloom filter
    bfInputData = util.readIntFileDat(configData['inFileName'])
    if (len(bfInputData) == 0):
        print('No Data to add to bloom filter')
        return
    else:
        print('bfInputData has ' + str(len(bfInputData)) + ' elements')
    # testBF will insert elements and test membership
    outputResList = testBF(bfInputData, bf, configData['m'])
    # write results to output file
    util.writeFileDat(configData['outFileName'], outputResList)
    # load appropriate validation data list for this hash function and compare to results
    util.compareResults(outputResList, configData)
    print('Task 2 complete')
Exemplo n.º 2
0
def task3(configData):
    # if you wish to use this code to perform task 3, you may do so
    # NOTE task 3 will require you to remake your bloom filter multiple times to perform the appropriate trials
    # this will necessitate either making a new bloom filter constructor or changing the config dictionary to
    # hold the appropriate values for k and n (filter size) based on c value, derived as in the notes
    # REMEMBER for type 2 hashes n must be prime.  util.findNextPrime(n) is provided for you to use to find the next largest
    # prime value of some integer.

    c = 10  # 10 or 15
    const_lst = [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    k_list = []
    for const in const_lst:
        k = int(const * c)
        k_list.append(k)

    n = 1917017
    m = int(round(n / c, 0))
    z = m + m

    dataSet = set()
    # m = 10000000  # number of desired elements total to use
    maxVal = 2147483647  # == 2**31-1
    done = False
    mT = m // 100  # some amount of randoms to generate per iteration, faster than 1 at a time
    while not done:
        rndList = np.random.randint(0, maxVal, mT)
        [dataSet.add(x) for x in rndList]
        done = (len(dataSet) >= z)
    added = list(dataSet)[:m]  # keep just first m values - not necessary
    witheld = list(dataSet)[m + 1:]

    listof_seedlist = []
    seed_list = []
    listof_alist = []
    a_list = []
    listof_blist = []
    b_list = []

    for no in k_list:
        for i in range(0, no):
            seed_list.append(random.randint(0, n))
        listof_seedlist.append(seed_list)
        seed_list = []

    for no in k_list:
        for i in range(0, no):
            a_list.append(random.randint(1, n))
        listof_alist.append(a_list)
        a_list = []

    for no in k_list:
        for i in range(0, no):
            b_list.append(random.randint(0, n))
        listof_blist.append(b_list)
        b_list = []

    # override parameters
    # configData['k'] configData['seeds'] configData['a'] configData['b']
    # configData['n'] configData['m'], configData['N']
    # configData['type'] configData['genSeed'] genHashes = False

    listof_trail_hf1_list = []
    trail_hf1_list = []
    trail_hf2_list = []
    listof_trail_hf2_list = []

    # hash function 1
    for k1 in k_list:
        configData['k'] = k1
        configData['seeds'] = listof_seedlist[k_list.index(k1)]
        configData['n'] = n
        configData['m'] = m
        configData['genSeed'] = 1994958112
        configData['type'] = 1
        for i1 in range(0, 10):
            bf = BloomFilter(configData)
            outputResList = testBF(added, witheld, bf, configData['m'])
            util.writeFileDat(configData['outFileName'], outputResList)
            # load appropriate validation data list for this hash function and compare to results
            util.compareResults(outputResList, configData)
            '''print len(witheld)
            print len(outputResList)
            if (len(witheld) != len(outputResList)):
                print('compareFiles : Failure : Attempting to compare different size lists')
                return None
            numFail = 0
            numFTrueRes = 0
            numFFalseRes = 0
            for i in range(len(outputResList)):
                if (outputResList[i].strip().lower() != added[i].strip().lower()):
                    resVal = outputResList[i].strip().lower()
                    baseResVal = witheld[i].strip().lower()
                    # uncomment this to see inconsistencies
                    print('i : ' + str(i) + ': reslist : ' + resVal + ' | baseres : ' + baseResVal)
                    numFail += 1
                    if resVal == 'true':
                        numFTrueRes += 1
                    else:
                        numFFalseRes += 1
            if (numFail == 0):
                print('compareResults : Your bloom filter performs as expected')
            else:
                print('compareResults : Number of mismatches in bloomfilter compared to validation file : ' + str(
                        numFail) + '| # of incorrect true results : ' + str(
                        numFTrueRes) + '| # of incorrect False results : ' + str(numFFalseRes))
            if ((configData['studentName'] != '') and (configData['autograde'] == 2)):
                    gradeRes = configData['studentName'] + ', ' + str(numFail) + ', ' + str(numFTrueRes) + ', ' + str(
                    numFFalseRes)
                    print('saving results for ' + gradeRes + ' to autogradeResult.txt')'''
            trail_hf1_list.append('a')
        listof_trail_hf1_list.append(trail_hf1_list)
        trail_hf1_list = []

    print c
    print k_list
    print m
    print n
    print listof_seedlist
    print listof_alist
    print listof_blist

    print('Task 3 complete')