def __init__(self, length, decay_rate, random_seed): """Creates an ExponentialSameKeyAggregator. An ExponentialSameKeyAggregator includes three components: 1. An ExponentialBloomFilter for estimating the reach. 2. An AnySketch to track the unique key in each register. 3. Another AnySketch to track the frequency of each effective key. Args: length: The length of bit vector for the Exponential bloom filter. decay_rate: The decay rate of Exponential distribution. random_seed: An optional integer specifying the random seed for generating the random seeds for hash functions. """ self.length = length self.decay_rate = decay_rate self.exponential_bloom_filter = ExponentialBloomFilter( length=length, decay_rate=decay_rate, random_seed=random_seed) self.unique_key_tracker = any_sketch.AnySketch( any_sketch.SketchConfig([ any_sketch.IndexSpecification( any_sketch.ExponentialDistribution(length, decay_rate), 'exp') ], num_hashes=1, value_functions=[any_sketch.UniqueKeyFunction()]), random_seed) self.frequency_count_tracker = any_sketch.AnySketch( any_sketch.SketchConfig([ any_sketch.IndexSpecification( any_sketch.ExponentialDistribution(length, decay_rate), 'exp') ], num_hashes=1, value_functions=[any_sketch.SumFunction()]), random_seed)
def __init__(self, length, num_hashes=1, random_seed=None, probability=0.08): """Creates a BloomFilter. Args: length: The length of bit vector for the bloom filter random_seed: An optional integer specifying the random seed for generating the random seeds for hash functions. """ super().__init__( any_sketch.SketchConfig([ any_sketch.IndexSpecification( any_sketch.GeometricDistribution(length, probability), "geometric") ], num_hashes=1, value_functions=[any_sketch.BitwiseOrFunction()]), random_seed)
def __init__(self, length, num_hashes=1, random_seed=None): """Creates a BloomFilter. Args: length: The length of bit vector for the bloom filter num_hashes: The number of hash functions to use. random_seed: An optional integer specifying the random seed for generating the random seeds for hash functions. """ any_sketch.AnySketch.__init__( self, any_sketch.SketchConfig([ any_sketch.IndexSpecification( any_sketch.UniformDistribution(length), "dimension_1") ], num_hashes, [any_sketch.BitwiseOrFunction()]), random_seed)
def __init__(self, length, random_seed=None): """Creates an LogarithmicBloomFilter. Args: length: The length of bit vector for the bloom filter. random_seed: An optional integer specifying the random seed for generating the random seeds for hash functions. """ AnyDistributionBloomFilter.__init__( self, any_sketch.SketchConfig([ any_sketch.IndexSpecification( any_sketch.LogBucketDistribution(length), "log") ], num_hashes=1, value_functions=[any_sketch.BitwiseOrFunction()]), random_seed)
def __init__(self, length, random_seed=None): """Creates a Uniform Counting BloomFilter. Args: length: The length of bit vector for the bloom filter random_seed: An optional integer specifying the random seed for generating the random seeds for hash functions. """ super().__init__( any_sketch.SketchConfig([ any_sketch.IndexSpecification( any_sketch.UniformDistribution(length), "uniformcbf") ], num_hashes=1, value_functions=[any_sketch.SumFunction() ]), random_seed)
def __init__(self, length, decay_rate, random_seed=None): """Creates an ExponentialBloomFilter. Args: length: The length of bit vector for the bloom filter. decay_rate: The decay rate of Exponential distribution. random_seed: An optional integer specifying the random seed for generating the random seeds for hash functions. """ AnyDistributionBloomFilter.__init__( self, any_sketch.SketchConfig([ any_sketch.IndexSpecification( any_sketch.ExponentialDistribution(length, decay_rate), "exp") ], num_hashes=1, value_functions=[any_sketch.BitwiseOrFunction()]), random_seed) self.decay_rate = decay_rate
def __init__(self, length, probability, random_seed=None): """Creates a BloomFilter. Args: length: The length of bit vector for the bloom filter probability: p of geometric distribution, p should be small enough that geom.cdf(length, probability) won't be 1 in the middle of the array so all bits can be used random_seed: An optional integer specifying the random seed for generating the random seeds for hash functions. """ super().__init__( any_sketch.SketchConfig( [ any_sketch.IndexSpecification( any_sketch.GeometricDistribution(length, probability), "geometric") ], num_hashes=1, value_functions=[any_sketch.BitwiseOrFunction()]), random_seed)