def setUp(self): self.dir_ = tempfile.mkdtemp() self.node_dir = os.path.join(self.dir_, 'node') os.mkdir(self.node_dir) bloom_filter_hook = FileBloomFilter( os.path.join(self.dir_, 'bloomfilter'), 10) self.node = Node(self.node_dir, verify_exists_hook=bloom_filter_hook)
def __init__(self, working_dir, capacity, false_positive_rate=0.01): super(FileBloomFilterDeduper, self).__init__(working_dir) sync_file = os.path.join(self.working_dir, BLOOM_FILETER_STATUS_FILENAME) self.filter = FileBloomFilter(sync_file, capacity, false_positive_rate=false_positive_rate) self.is_shutdown = False
def create_bloom_filter_hook(bloom_filter_file, job): size = job.context.job.size if not os.path.exists(bloom_filter_file): bloom_filter_size = size * 10 else: if size > 0: bloom_filter_size = size * 2 else: bloom_filter_size = UNLIMIT_BLOOM_FILTER_CAPACITY return FileBloomFilter(bloom_filter_file, bloom_filter_size)
def _init_bloom_filter(self): size = self.job.context.job.size base = 1 if not self.job.is_bundle else 1000 bloom_filter_file = os.path.join(self.root, 'bloomfilter') if not os.path.exists(bloom_filter_file): if size > 0: bloom_filter_size = size * 10 * base else: bloom_filter_size = UNLIMIT_BLOOM_FILTER_CAPACITY else: if size > 0: bloom_filter_size = size * 2 * base else: bloom_filter_size = UNLIMIT_BLOOM_FILTER_CAPACITY return FileBloomFilter(bloom_filter_file, bloom_filter_size)
def testPutGet(self): num = str(12345) self.assertEqual(self.node.put(num), num) self.assertEqual(self.node.put(num), '') num2 = str(67890) nums = [num, num2] self.assertEqual(self.node.put(nums), [num2]) self.node.shutdown() self.assertGreater( os.path.getsize(os.path.join(self.dir_, 'bloomfilter')), 0) bloom_filter_hook = FileBloomFilter( os.path.join(self.dir_, 'bloomfilter'), 5) self.node = Node(self.node_dir, verify_exists_hook=bloom_filter_hook) num3 = str(13579) nums = [num, num2, num3] self.assertEqual(self.node.put(nums), [num3])
def __init__(self, sync_file, capacity): self.filter = FileBloomFilter(sync_file, capacity)