def test_insert_then_test(self): result = create_index( '/tmp/fake.csv', # input filename self.test_file, # file-like object 0.0001, # error rate 1, # skip lines [1, 2], # fields ',', # delimiter False) # recursive domain self.assertEqual( {'/tmp/fake.csv.2.bfindex': 6, '/tmp/fake.csv.1.bfindex': 5}, result) b1 = BloomFilter.fromfile(open('/tmp/fake.csv.1.bfindex', 'rb')) b2 = BloomFilter.fromfile(open('/tmp/fake.csv.2.bfindex', 'rb')) self.assertEqual(False, 'FieldA' in b1) self.assertEqual(False, 'FieldB' in b2) for word in ('apple', 'banana', 'orange', 'pear', 'pineapple'): self.assertEqual(True, word in b1) self.assertEqual(False, word in b2) for word in ('carrot', 'potato', 'leek', 'cauliflower', 'bean'): self.assertEqual(True, word in b2) self.assertEqual(False, word in b1)
def test_insert_then_test(self): result = create_index( '/tmp/fake.csv', # input filename self.test_file, # file-like object 0.0001, # error rate 1, # skip lines [1, 2], # fields ',', # delimiter False) # recursive domain self.assertEqual( { '/tmp/fake.csv.2.bfindex': 6, '/tmp/fake.csv.1.bfindex': 5 }, result) b1 = BloomFilter.fromfile(open('/tmp/fake.csv.1.bfindex', 'rb')) b2 = BloomFilter.fromfile(open('/tmp/fake.csv.2.bfindex', 'rb')) self.assertEqual(False, 'FieldA' in b1) self.assertEqual(False, 'FieldB' in b2) for word in ('apple', 'banana', 'orange', 'pear', 'pineapple'): self.assertEqual(True, word in b1) self.assertEqual(False, word in b2) for word in ('carrot', 'potato', 'leek', 'cauliflower', 'bean'): self.assertEqual(True, word in b2) self.assertEqual(False, word in b1)
def test_higher_field_than_column_count(self): self.assertRaises( InvalidArgument, lambda: create_index( '/tmp/fake.csv', # input filename self.test_file, # file-like object 0.0001, # error rate 1, # skip lines [4], # fields ',', # delimiter False)) # recursive domain
def test_recursive_domains(self): result = create_index( '/tmp/fake.csv', # input filename self.test_file, # file-like object 0.0001, # error rate 1, # skip lines [3], # fields ',', # delimiter True) # recursive domain self.assertEqual({'/tmp/fake.csv.3.bfindex': 9}, result) b = BloomFilter.fromfile(open('/tmp/fake.csv.3.bfindex', 'rb')) for word in ('subdomain.yahoo.com', 'yahoo.com', 'com', 'example.domain.com', 'domain.com', 'www.google.co.uk', 'google.co.uk', 'co.uk', 'uk'): self.assertEqual(True, word in b)
def test_recursive_domains(self): result = create_index( '/tmp/fake.csv', # input filename self.test_file, # file-like object 0.0001, # error rate 1, # skip lines [3], # fields ',', # delimiter True) # recursive domain self.assertEqual( {'/tmp/fake.csv.3.bfindex': 9}, result) b = BloomFilter.fromfile(open('/tmp/fake.csv.3.bfindex', 'rb')) for word in ('subdomain.yahoo.com', 'yahoo.com', 'com', 'example.domain.com', 'domain.com', 'www.google.co.uk', 'google.co.uk', 'co.uk', 'uk'): self.assertEqual(True, word in b)