Пример #1
0
    def test_fc_inverted_logic_automatic(self):
        fc = filtercascade.FilterCascade(min_filter_length=1024)
        self.assertEqual(None, fc.invertedLogic)

        iterator, huge_set = get_serial_iterator_and_set(num_iterator=100,
                                                         num_set=50_000)

        # Should automatically invert the logic
        fc.initialize(include=huge_set, exclude=set(iterator))
        self.assertTrue(fc.invertedLogic)

        iterator, huge_set = get_serial_iterator_and_set(num_iterator=100,
                                                         num_set=50_000)
        fc.verify(include=huge_set, exclude=iterator)

        h = MockFile()
        fc.tofile(h)

        self.assertEqual(len(h.data), 1055)

        fc2 = filtercascade.FilterCascade.from_buf(h)
        self.assertFilterCascadeEqual(fc, fc2)

        iterator, huge_set = get_serial_iterator_and_set(num_iterator=100,
                                                         num_set=50_000)
        fc2.verify(include=huge_set, exclude=iterator)
Пример #2
0
 def test_fc_version_1_with_salt(self):
     with self.assertRaises(ValueError):
         filtercascade.FilterCascade(
             defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256,
             salt=b"happiness",
             version=1,
         )
Пример #3
0
    def test_fc_inverted_logic_iterators(self):
        fc = filtercascade.FilterCascade()
        self.assertFalse(fc.invertedLogic)

        iterator, huge_set = get_serial_iterator_and_set(num_iterator=100,
                                                         num_set=50_000)
        with self.assertRaises(filtercascade.InvertedLogicException):
            fc.initialize(include=huge_set, exclude=iterator)
Пример #4
0
    def test_fc_input_formats(self):
        f1 = filtercascade.FilterCascade([])
        f1.initialize(include=["A"], exclude=["D"])

        f2 = filtercascade.FilterCascade([])
        f2.initialize(include=[b"A"], exclude=[b"D"])

        incClass = SimpleToByteClass(ord("A"))
        excClass = SimpleToByteClass(ord("D"))
        f3 = filtercascade.FilterCascade([])
        f3.initialize(include=[incClass], exclude=[excClass])

        self.assertTrue(incClass.method_called)
        self.assertTrue(excClass.method_called)

        self.assertFilterCascadeEqual(f1, f2)
        self.assertFilterCascadeEqual(f1, f3)
Пример #5
0
    def test_expected_error_rates(self):
        fc = filtercascade.FilterCascade()
        result = fc.set_crlite_error_rates(include_len=50, exclude_len=1_000)
        self.assertAlmostEqual(result[0], 0.0353, places=3)
        self.assertEqual(result[1], 0.5)
        self.assertEqual(result, fc.error_rates)

        with self.assertRaises(filtercascade.InvertedLogicException):
            fc.set_crlite_error_rates(include_len=1_000, exclude_len=50)
Пример #6
0
    def test_fc_serial_deserial(self):
        f1 = filtercascade.FilterCascade()
        f1.initialize(exclude=["A", "B", "C"], include=["D"])

        h = MockFile()
        f1.tofile(h)

        f2 = filtercascade.FilterCascade.from_buf(h)
        self.assertFilterCascadeEqual(f1, f2)
Пример #7
0
 def test_inverted_logic_erroneous_error_rate(self):
     not_blocked = ["one_not_blocked_item"]
     blocked = [str(i) for i in range(1000)]
     fprs = [len(blocked) / (math.sqrt(2) * len(not_blocked)), 0.5]
     with self.assertRaises(filtercascade.InvalidErrorRateException):
         filtercascade.FilterCascade(
             defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256,
             salt=b"VERY_PREDICTABLE",
             error_rates=fprs,
         )
Пример #8
0
    def test_fc_load_version_2(self):
        fc = filtercascade.FilterCascade([], version=2)
        valid, revoked = get_serial_sets(num_valid=10, num_revoked=1)
        fc.initialize(include=revoked, exclude=valid)

        h = MockFile()
        fc.tofile(h)

        fc2 = filtercascade.FilterCascade.from_buf(h)
        self.assertFilterCascadeEqual(fc, fc2)
Пример #9
0
    def test_fc_iterable(self):
        f = filtercascade.FilterCascade([])

        valid, revoked = get_serial_sets(num_valid=500_000, num_revoked=3_000)
        f.initialize(include=revoked, exclude=valid)

        self.assertEqual(len(f.filters), 3)
        self.assertEqual(f.filters[0].size, 81272)
        self.assertEqual(f.filters[1].size, 14400)
        self.assertEqual(f.filters[2].size, 14400)
    def test_fc_serial_deserial(self):
        f1 = filtercascade.FilterCascade([])
        f1.initialize(include=["A", "B", "C"], exclude=["D"])

        h = MockFile()
        f1.tofile(h)

        f2 = filtercascade.FilterCascade.from_buf(h)

        for i in range(0, len(f1.filters)):
            self.assertBloomerEqual(f1.filters[i], f2.filters[i])
Пример #11
0
    def test_fc_load_version_2(self):
        fc = filtercascade.FilterCascade(version=2)
        iterator, small_set = get_serial_iterator_and_set(num_iterator=10,
                                                          num_set=1)
        fc.initialize(include=small_set, exclude=iterator)

        h = MockFile()
        fc.tofile(h)

        fc2 = filtercascade.FilterCascade.from_buf(h)
        self.assertFilterCascadeEqual(fc, fc2)
Пример #12
0
    def test_fc_small_filter_length(self):
        fc = filtercascade.FilterCascade(min_filter_length=8)

        iterator, small_set = get_serial_iterator_and_set(num_iterator=5_000,
                                                          num_set=100)

        fc.initialize(include=small_set, exclude=iterator)
        h = MockFile()
        fc.tofile(h)
        self.assertEqual(len(h.data), 280)

        fc2 = filtercascade.FilterCascade.from_buf(h)
        self.assertFilterCascadeEqual(fc, fc2)
Пример #13
0
    def test_sha256_with_salt(self):
        fc = filtercascade.FilterCascade(
            [], hashAlg=filtercascade.HashAlgorithm.SHA256, salt=b"happiness")

        valid, revoked = get_serial_sets(num_valid=10, num_revoked=1)
        fc.initialize(include=revoked, exclude=valid)

        self.assertEqual(len(fc.filters), 1)
        self.assertEqual(fc.bitCount(), 81272)

        f = MockFile()
        fc.tofile(f)
        self.assertEqual(len(f.data), 10183)
Пример #14
0
    def test_fc_iterable(self):
        f = filtercascade.FilterCascade([])

        serials = predictable_serial_gen(500_000)
        # revocations must be disjoint from the main set, so
        # slice off a set and re-use the remainder
        revocations = set(islice(serials, 3_000))

        f.initialize(include=revocations, exclude=serials)

        self.assertEqual(len(f.filters), 3)
        self.assertEqual(f.filters[0].size, 81272)
        self.assertEqual(f.filters[1].size, 14400)
        self.assertEqual(f.filters[2].size, 14400)
Пример #15
0
    def test_verify_failure(self):
        """
        This test cheats, changing the corpus of data out from under the Bloom
        filter. Not every such change would raise an AssertionError,
        particularly on these small data-sets.
        """
        fc = filtercascade.FilterCascade([])

        valid, revoked = get_serial_sets(num_valid=10, num_revoked=1)
        fc.initialize(include=revoked, exclude=valid)

        with self.assertRaises(AssertionError):
            valid2, revoked2 = get_serial_sets(num_valid=10, num_revoked=2)
            fc.verify(include=revoked2, exclude=valid2)
Пример #16
0
    def test_fc_load_version_2_with_salt(self):
        fc = filtercascade.FilterCascade(
            [],
            version=2,
            salt=b"nacl",
            hashAlg=filtercascade.HashAlgorithm.SHA256)
        valid, revoked = get_serial_sets(num_valid=10, num_revoked=1)
        fc.initialize(include=revoked, exclude=valid)

        h = MockFile()
        fc.tofile(h)

        fc2 = filtercascade.FilterCascade.from_buf(h)
        self.assertFilterCascadeEqual(fc, fc2)
Пример #17
0
    def test_fc_inverted_logic_explicit(self):
        fc = filtercascade.FilterCascade(invertedLogic=True)
        iterator, small_set = get_serial_iterator_and_set(num_iterator=2,
                                                          num_set=2)
        fc.initialize(include=small_set, exclude=set(iterator))
        self.assertTrue(fc.invertedLogic)

        iterator, small_set = get_serial_iterator_and_set(num_iterator=2,
                                                          num_set=2)
        fc.verify(include=small_set, exclude=iterator)

        h = MockFile()
        fc.tofile(h)
        fc2 = filtercascade.FilterCascade.from_buf(h)
        self.assertTrue(fc2.invertedLogic)
        self.assertFilterCascadeEqual(fc, fc2)
Пример #18
0
    def test_verify_failure(self):
        """
        This test cheats, changing the corpus of data out from under the Bloom
        filter. Not every such change would raise an AssertionError,
        particularly on these small data-sets.
        """
        fc = filtercascade.FilterCascade()

        iterator, small_set = get_serial_iterator_and_set(num_iterator=10,
                                                          num_set=1)
        fc.initialize(include=small_set, exclude=iterator)

        with self.assertRaises(AssertionError):
            iterator2, small_set2 = get_serial_iterator_and_set(
                num_iterator=10, num_set=2)
            fc.verify(include=small_set2, exclude=iterator2)
Пример #19
0
    def test_fc_load_version_2_with_salt(self):
        fc = filtercascade.FilterCascade(
            version=2,
            salt=b"nacl",
            defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256,
        )
        iterator, small_set = get_serial_iterator_and_set(num_iterator=10,
                                                          num_set=1)
        fc.initialize(include=small_set, exclude=iterator)
        self.assertFalse(fc.invertedLogic)

        h = MockFile()
        fc.tofile(h)

        fc2 = filtercascade.FilterCascade.from_buf(h)
        self.assertFilterCascadeEqual(fc, fc2)
Пример #20
0
    def test_sha256_with_salt(self):
        fc = filtercascade.FilterCascade(
            defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256,
            salt=b"happiness",
        )

        iterator, small_set = get_serial_iterator_and_set(num_iterator=10,
                                                          num_set=1)
        fc.initialize(include=small_set, exclude=iterator)

        self.assertEqual(len(fc.filters), 1)
        self.assertEqual(fc.bitCount(), 8128)

        f = MockFile()
        fc.tofile(f)
        self.assertEqual(len(f.data), 1039)
Пример #21
0
    def verify_minimum_sets(self, *, hashAlg):
        fc = filtercascade.FilterCascade([], hashAlg=hashAlg)

        valid, revoked = get_serial_sets(num_valid=10, num_revoked=1)
        fc.initialize(include=revoked, exclude=valid)

        self.assertEqual(len(fc.filters), 1)
        self.assertEqual(fc.bitCount(), 81272)

        f = MockFile()
        fc.tofile(f)
        self.assertEqual(len(f.data), 10174)

        fc2 = filtercascade.FilterCascade.from_buf(f)
        valid2, revoked2 = get_serial_sets(num_valid=10, num_revoked=1)
        fc2.verify(include=revoked2, exclude=valid2)
Пример #22
0
    def test_fc_iterable(self):
        f = filtercascade.FilterCascade(filters=[])

        iterator, small_set = get_serial_iterator_and_set(num_iterator=500_000,
                                                          num_set=3_000)
        f.initialize(include=small_set, exclude=iterator)
        self.assertFalse(f.invertedLogic)

        self.assertEqual(len(f.filters), 10)
        self.assertEqual(
            list(map(lambda x: x.size, f.filters)),
            [26824, 10624, 2184, 5208, 1440, 1872, 1440, 1440, 1440, 1440],
        )

        h = MockFile()
        f.tofile(h)
        self.assertEqual(len(h), 6843)
Пример #23
0
    def verify_minimum_sets(self, *, hashAlg):
        fc = filtercascade.FilterCascade(defaultHashAlg=hashAlg)

        iterator, small_set = get_serial_iterator_and_set(num_iterator=10,
                                                          num_set=1)
        fc.initialize(include=small_set, exclude=iterator)

        self.assertEqual(len(fc.filters), 1)
        self.assertEqual(fc.bitCount(), 8128)

        f = MockFile()
        fc.tofile(f)
        self.assertEqual(len(f.data), 1030)

        fc2 = filtercascade.FilterCascade.from_buf(f)
        iterator2, small_set2 = get_serial_iterator_and_set(num_iterator=10,
                                                            num_set=1)
        fc2.verify(include=small_set2, exclude=iterator2)
Пример #24
0
    def test_increased_false_positive_rate_in_deeper_layer(self):
        salt = b"VERY_PREDICTABLE"
        blocked = []
        not_blocked = []
        for i in range(1, 1000):
            not_blocked.append(str(-i))
            blocked.append(str(i))
        fprs = [len(blocked) / (math.sqrt(2) * len(not_blocked)), 0.5]
        fc = filtercascade.FilterCascade(
            error_rates=fprs,
            defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256,
            salt=salt,
        )
        fc.initialize(include=blocked, exclude=not_blocked)
        fc.verify(include=blocked, exclude=not_blocked)

        self.assertEqual(len(fc.filters), 6)
        self.assertEqual(fc.bitCount(), 7992)
Пример #25
0
    def test_set_error_rates(self):
        fc = filtercascade.FilterCascade()
        with self.assertRaises(ValueError):
            fc.set_error_rates([])
        with self.assertRaises(filtercascade.InvalidErrorRateException):
            fc.set_error_rates([-1])
        with self.assertRaises(filtercascade.InvalidErrorRateException):
            fc.set_error_rates([1.1])
        with self.assertRaises(filtercascade.InvalidErrorRateException):
            fc.set_error_rates([0])
        with self.assertRaises(filtercascade.InvalidErrorRateException):
            fc.set_error_rates([0, 0.25, 0.9])
        with self.assertRaises(filtercascade.InvalidErrorRateException):
            fc.set_error_rates([0.25, 0.9, 1.0])
        with self.assertRaises(filtercascade.InvalidErrorRateException):
            fc.set_error_rates([0.25, 0.9, 940])

        fc.set_error_rates([0.99, 0.01])
Пример #26
0
    def test_fc_standard_logic_disk_layout(self):
        fc = filtercascade.FilterCascade(
            defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256,
            salt=b"a")
        iterator, small_set = get_serial_iterator_and_set(num_iterator=50_000,
                                                          num_set=100)

        # Should automatically invert the logic
        fc.initialize(include=small_set, exclude=iterator)
        self.assertFalse(fc.invertedLogic)

        h = MockFile()
        fc.tofile(h)
        self.assertEqual(h.data[0:1], b"\x02")
        self.assertEqual(h.data[2], 0)  # inverted
        self.assertEqual(h.data[3], 1)  # salt_len
        self.assertEqual(h.data[4], ord("a"))  # salt
        self.assertEqual(h.data[5],
                         filtercascade.fileformats.HashAlgorithm.SHA256)
Пример #27
0
    def test_fc_heterogenous_hash_algorithms(self):
        fc = filtercascade.FilterCascade(filters=[
            filtercascade.Bloomer(
                size=32,
                nHashFuncs=6,
                level=1,
                hashAlg=filtercascade.fileformats.HashAlgorithm.SHA256,
            ),
            filtercascade.Bloomer(
                size=32,
                nHashFuncs=1,
                level=2,
                hashAlg=filtercascade.fileformats.HashAlgorithm.MURMUR3,
            ),
        ])

        h = MockFile()
        fc.tofile(h)

        with self.assertRaises(ValueError):
            filtercascade.FilterCascade.from_buf(h)
Пример #28
0
        m = hashlib.sha256()
        m.update(counter.to_bytes(4, byteorder="big"))
        yield m.hexdigest()


def store(fc, path):
    if path.exists():
        path.unlink()
    with open(path, "wb") as f:
        fc.tofile(f)


large_set = set(predictable_serial_gen(100_000))

v2_sha256_with_salt = filtercascade.FilterCascade(
    [],
    defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256,
    salt=b"nacl")
v2_sha256_with_salt.initialize(include=[b"this", b"that"],
                               exclude=large_set | set([b"other"]))
store(v2_sha256_with_salt, Path("test_v2_sha256_salt_mlbf"))

v2_sha256 = filtercascade.FilterCascade(
    [], defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256)
v2_sha256.initialize(include=[b"this", b"that"],
                     exclude=large_set | set([b"other"]))
store(v2_sha256, Path("test_v2_sha256_mlbf"))

v2_murmur = filtercascade.FilterCascade(
    [], defaultHashAlg=filtercascade.fileformats.HashAlgorithm.MURMUR3)
v2_murmur.initialize(include=[b"this", b"that"],
                     exclude=large_set | set([b"other"]))
Пример #29
0
 def test_fc_include_not_list(self):
     f = filtercascade.FilterCascade([])
     with self.assertRaises(TypeError):
         f.initialize(include=predictable_serial_gen(1),
                      exclude=predictable_serial_gen(1))
Пример #30
0
 def test_fc_exclude_must_be_iterable(self):
     f = filtercascade.FilterCascade([])
     with self.assertRaises(TypeError):
         f.initialize(include=[], exclude=list(1))