Exemplo n.º 1
0
 def setUp(self):
     self.dir_ = tempfile.mkdtemp()
     self.node_dir = os.path.join(self.dir_, 'node')
     os.mkdir(self.node_dir)
     bloom_filter_hook = FileBloomFilter(
         os.path.join(self.dir_, 'bloomfilter'), 10)
     self.node = Node(self.node_dir, verify_exists_hook=bloom_filter_hook)
Exemplo n.º 2
0
    def __init__(self, working_dir, capacity, false_positive_rate=0.01):
        super(FileBloomFilterDeduper, self).__init__(working_dir)
        sync_file = os.path.join(self.working_dir,
                                 BLOOM_FILETER_STATUS_FILENAME)

        self.filter = FileBloomFilter(sync_file,
                                      capacity,
                                      false_positive_rate=false_positive_rate)
        self.is_shutdown = False
Exemplo n.º 3
0
def create_bloom_filter_hook(bloom_filter_file, job):
    size = job.context.job.size
    if not os.path.exists(bloom_filter_file):
        bloom_filter_size = size * 10
    else:
        if size > 0:
            bloom_filter_size = size * 2
        else:
            bloom_filter_size = UNLIMIT_BLOOM_FILTER_CAPACITY
    return FileBloomFilter(bloom_filter_file, bloom_filter_size)
Exemplo n.º 4
0
    def _init_bloom_filter(self):
        size = self.job.context.job.size
        base = 1 if not self.job.is_bundle else 1000
        bloom_filter_file = os.path.join(self.root, 'bloomfilter')

        if not os.path.exists(bloom_filter_file):
            if size > 0:
                bloom_filter_size = size * 10 * base
            else:
                bloom_filter_size = UNLIMIT_BLOOM_FILTER_CAPACITY
        else:
            if size > 0:
                bloom_filter_size = size * 2 * base
            else:
                bloom_filter_size = UNLIMIT_BLOOM_FILTER_CAPACITY
        return FileBloomFilter(bloom_filter_file, bloom_filter_size)
Exemplo n.º 5
0
    def testPutGet(self):
        num = str(12345)

        self.assertEqual(self.node.put(num), num)
        self.assertEqual(self.node.put(num), '')

        num2 = str(67890)
        nums = [num, num2]
        self.assertEqual(self.node.put(nums), [num2])

        self.node.shutdown()
        self.assertGreater(
            os.path.getsize(os.path.join(self.dir_, 'bloomfilter')), 0)

        bloom_filter_hook = FileBloomFilter(
            os.path.join(self.dir_, 'bloomfilter'), 5)
        self.node = Node(self.node_dir, verify_exists_hook=bloom_filter_hook)

        num3 = str(13579)
        nums = [num, num2, num3]
        self.assertEqual(self.node.put(nums), [num3])
Exemplo n.º 6
0
 def __init__(self, sync_file, capacity):
     self.filter = FileBloomFilter(sync_file, capacity)