Пример #1
0
def getDataStructureEcoli(ds):
    if ds == "HashSet":
        return HashSet()
    elif ds == "BloomFilter":
        return BloomFilter(ECOLI_GENOME_AVERAGE_SIZE, FPR)
    else:
        return CountingFilter(ECOLI_GENOME_AVERAGE_SIZE, FPR)
    def test_insert_contains(self):
        s = CountingFilter(n, p)
        tempList = []
        for i in range(1000):
            tempList.append(i)

        random.shuffle(tempList)
        for i in tempList:
            s.insert(str(i))

        # If the Counting Filter actually contains an element, it should always say it does
        for i in range(1000):
            self.assertTrue(s.contains(str(i)))

        # If a Counting Filter does not contain an element, there is a ~5% chance of a false positive
        errors = 0
        for i in range(-1, -1001, -1):
            if s.contains(str(i)):
                errors = errors + 1
        print(errors)
        self.assertTrue(errors < n * (p + buffer))
    def test_intersection1(self):
        a = CountingFilter(n, p / 50)  # we need to use a slightly smaller fpr here so we don't have too many intersects
        b = CountingFilter(n, p / 50)
        for i in range(1000):
            a.insert(str(i))

        for i in range(1000, 2000):
            b.insert(str(i))

        a.intersection(b)

        # There are no elements in the intersection, so this bloom filter should contain nothing except for a few
        # elements caused by hash collisions
        errors = 0
        for i in range(2000):
            if a.contains(str(i)):
                errors = errors + 1

        print("number of intersections is", errors)
        self.assertTrue(errors < n * 0.01)
    def test_union2(self):
        a = CountingFilter(n, p)
        b = CountingFilter(n, p)
        for i in range(1000):
            a.insert(str(i))

        for i in range(500, 1000):
            b.insert(str(i))

        a.union(b)

        # If the union Bloom Filter actually contains an element, it should always say it does
        for i in range(1000):
            self.assertTrue(a.contains(str(i)))

        # If a Bloom Filter does not contain an element, there is a chance of false positives
        errors = 0
        for i in range(-1, -1001, -1):
            if a.contains(str(i)):
                errors = errors + 1
        print("number of errors is", errors)
        fpr = self.get_false_positive_rate(n, a.getBitSize(), a.getHashCount())
        print("false positive rate is", fpr)
        self.assertTrue(errors < n * (fpr + buffer))
 def test_check_Union(self):
     a = CountingFilter(n, p)
     a.insert(str(1034))
     a.insert(str(1034))
     b = CountingFilter(n, p)
     b.insert(str(1034))
     b.insert(str(1034))
     a.union(b)
     ret = a.howMany(str(1034))
     self.assertTrue(ret == 4)
    def test_check_if_in(self):
        a = CountingFilter(n, p)
        a.insert(str(1034))
        a.insert(str(1034))
        ret = a.howMany(str(1034))
        self.assertTrue(ret == 2)

        b = CountingFilter(n, p)
        b.insert(str(1034))
        b.insert(str(1034))
        a.intersection(b)

        ret2 = a.howMany(str(1034))
        self.assertTrue(ret2 == 2)
    def test_intersection3(self):
        a = CountingFilter(n, p)
        b = CountingFilter(n, p)
        c = CountingFilter(n, p)
        for i in range(1000):
            a.insert(str(i))

        for i in range(500, 1000):
            b.insert(str(i))

        for i in range(600, 700):
            c.insert(str(i))

        b.intersection(c)
        a.intersection(b)

        fpr = self.get_false_positive_rate(100, a.getBitSize(), a.getHashCount())  # only 100 elements in intersect

        # False positive errors should be expected in this range
        errors = 0
        for i in range(600):
            if a.contains(str(i)):
                errors = errors + 1
        self.assertTrue(errors < n * (fpr + buffer))

        # 100% accuracy is expected in this range
        for i in range(600, 700):
            self.assertTrue(a.contains(str(i)))

        # False positive errors should be expected in this range
        errors = 0
        for i in range(700, 1000):
            if a.contains(str(i)):
                errors = errors + 1
        self.assertTrue(errors < n * (fpr + buffer))
    def test_intersection2(self):
        a = CountingFilter(n, p)
        b = CountingFilter(n, p)
        for i in range(1000):
            a.insert(str(i))

        for i in range(500, 1000):
            b.insert(str(i))

        a.intersection(b)

        # There are no elements from 0 - 499, so in this range the bloom filter should only contain false postives
        errors = 0
        for i in range(500):
            if a.contains(str(i)):
                errors = errors + 1
        print("intersect2 errors", errors)
        fpr = self.get_false_positive_rate(500, a.getBitSize(), a.getHashCount())  # only 500 elements in intersect
        print("intersect2 fpr", fpr)
        self.assertTrue(errors < n * (fpr + buffer))

        # These elements all should be in the intersection, so they must be in the intersect bloom filter
        for i in range(500, 1000, 1):
            self.assertTrue(a.contains(str(i)))