Пример #1
0
    def test_insert_then_test(self):
        result = create_index(
            '/tmp/fake.csv',  # input filename
            self.test_file,   # file-like object
            0.0001,           # error rate
            1,                # skip lines
            [1, 2],           # fields
            ',',              # delimiter
            False)            # recursive domain
        self.assertEqual(
            {'/tmp/fake.csv.2.bfindex': 6,
             '/tmp/fake.csv.1.bfindex': 5},
            result)
        b1 = BloomFilter.fromfile(open('/tmp/fake.csv.1.bfindex', 'rb'))
        b2 = BloomFilter.fromfile(open('/tmp/fake.csv.2.bfindex', 'rb'))

        self.assertEqual(False, 'FieldA' in b1)
        self.assertEqual(False, 'FieldB' in b2)

        for word in ('apple', 'banana', 'orange', 'pear', 'pineapple'):
            self.assertEqual(True, word in b1)
            self.assertEqual(False, word in b2)

        for word in ('carrot', 'potato', 'leek', 'cauliflower', 'bean'):
            self.assertEqual(True, word in b2)
            self.assertEqual(False, word in b1)
Пример #2
0
    def test_insert_then_test(self):
        result = create_index(
            '/tmp/fake.csv',  # input filename
            self.test_file,  # file-like object
            0.0001,  # error rate
            1,  # skip lines
            [1, 2],  # fields
            ',',  # delimiter
            False)  # recursive domain
        self.assertEqual(
            {
                '/tmp/fake.csv.2.bfindex': 6,
                '/tmp/fake.csv.1.bfindex': 5
            }, result)
        b1 = BloomFilter.fromfile(open('/tmp/fake.csv.1.bfindex', 'rb'))
        b2 = BloomFilter.fromfile(open('/tmp/fake.csv.2.bfindex', 'rb'))

        self.assertEqual(False, 'FieldA' in b1)
        self.assertEqual(False, 'FieldB' in b2)

        for word in ('apple', 'banana', 'orange', 'pear', 'pineapple'):
            self.assertEqual(True, word in b1)
            self.assertEqual(False, word in b2)

        for word in ('carrot', 'potato', 'leek', 'cauliflower', 'bean'):
            self.assertEqual(True, word in b2)
            self.assertEqual(False, word in b1)
Пример #3
0
 def test_higher_field_than_column_count(self):
     self.assertRaises(
         InvalidArgument,
         lambda: create_index(
             '/tmp/fake.csv',  # input filename
             self.test_file,   # file-like object
             0.0001,           # error rate
             1,                # skip lines
             [4],              # fields
             ',',              # delimiter
             False))           # recursive domain
Пример #4
0
 def test_higher_field_than_column_count(self):
     self.assertRaises(
         InvalidArgument,
         lambda: create_index(
             '/tmp/fake.csv',  # input filename
             self.test_file,  # file-like object
             0.0001,  # error rate
             1,  # skip lines
             [4],  # fields
             ',',  # delimiter
             False))  # recursive domain
Пример #5
0
    def test_recursive_domains(self):
        result = create_index(
            '/tmp/fake.csv',  # input filename
            self.test_file,  # file-like object
            0.0001,  # error rate
            1,  # skip lines
            [3],  # fields
            ',',  # delimiter
            True)  # recursive domain
        self.assertEqual({'/tmp/fake.csv.3.bfindex': 9}, result)

        b = BloomFilter.fromfile(open('/tmp/fake.csv.3.bfindex', 'rb'))

        for word in ('subdomain.yahoo.com', 'yahoo.com', 'com',
                     'example.domain.com', 'domain.com', 'www.google.co.uk',
                     'google.co.uk', 'co.uk', 'uk'):
            self.assertEqual(True, word in b)
Пример #6
0
    def test_recursive_domains(self):
        result = create_index(
            '/tmp/fake.csv',  # input filename
            self.test_file,   # file-like object
            0.0001,           # error rate
            1,                # skip lines
            [3],              # fields
            ',',              # delimiter
            True)             # recursive domain
        self.assertEqual(
            {'/tmp/fake.csv.3.bfindex': 9},
            result)

        b = BloomFilter.fromfile(open('/tmp/fake.csv.3.bfindex', 'rb'))

        for word in ('subdomain.yahoo.com', 'yahoo.com', 'com',
                     'example.domain.com', 'domain.com', 'www.google.co.uk',
                     'google.co.uk', 'co.uk', 'uk'):
            self.assertEqual(True, word in b)