def setUp(self): super(FrequencyEstimatorEvaluationAnalyzerTest, self).setUp() self.sketch_estimator_config_list = [ configs.SketchEstimatorConfig( name=evaluation_configs.construct_sketch_estimator_config_name( sketch_name='exact_multi_set', sketch_config='10000', estimator_name='lossless'), sketch_factory=exact_set.ExactMultiSet.get_sketch_factory(), estimator=exact_set.LosslessEstimator(), max_frequency=2, ), ] self.evaluation_config = configs.EvaluationConfig( name='frequency_end_to_end_test', num_runs=1, scenario_config_list=[ configs.ScenarioConfig( name='homogeneous', set_generator_factory=( frequency_set_generator.HomogeneousMultiSetGenerator. get_generator_factory_with_num_and_size( universe_size=100, num_sets=3, set_size=50, freq_rates=[5] * 3, freq_cap=8, ))) ]) self.run_name = 'test_run'
def setUp(self): super(ReportGeneratorTest, self).setUp() exact_set_lossless = simulator.SketchEstimatorConfig( name='exact_set-infty-infty-lossless', sketch_factory=exact_set.ExactSet.get_sketch_factory(), estimator=exact_set.LosslessEstimator(), sketch_noiser=None, estimate_noiser=None) exact_set_less_one = simulator.SketchEstimatorConfig( name='exact_set-infty-infty-less_one', sketch_factory=exact_set.ExactSet.get_sketch_factory(), estimator=exact_set.LessOneEstimator(), sketch_noiser=exact_set.AddRandomElementsNoiser( num_random_elements=0, random_state=np.random.RandomState()), estimate_noiser=None) self.sketch_estimator_config_list = (exact_set_lossless, exact_set_less_one) self.evaluation_config = configs.EvaluationConfig( name='test_evaluation', num_runs=2, scenario_config_list=[ configs.ScenarioConfig( name='ind1', set_generator_factory=( set_generator.IndependentSetGenerator. get_generator_factory_with_num_and_size( universe_size=10, num_sets=5, set_size=1))), configs.ScenarioConfig( name='ind2', set_generator_factory=( set_generator.IndependentSetGenerator. get_generator_factory_with_num_and_size( universe_size=10, num_sets=5, set_size=1))), ]) self.evaluation_run_name = 'test_run' def _run_evaluation_and_simulation(out_dir): self.evaluator = evaluator.Evaluator( evaluation_config=self.evaluation_config, sketch_estimator_config_list=self.sketch_estimator_config_list, run_name=self.evaluation_run_name, out_dir=out_dir) self.evaluator() self.analyzer = analyzer.CardinalityEstimatorEvaluationAnalyzer( out_dir=out_dir, evaluation_directory=out_dir, evaluation_run_name=self.evaluation_run_name, evaluation_name=self.evaluation_config.name, estimable_criteria_list=[(0.05, 0.95), (1.01, 0.9)]) self.analyzer() self.run_evaluation_and_simulation = _run_evaluation_and_simulation
def setUp(self): super(AnalyzerTest, self).setUp() exact_set_lossless = configs.SketchEstimatorConfig( name='exact_set_lossless', sketch_factory=exact_set.ExactSet.get_sketch_factory(), estimator=exact_set.LosslessEstimator(), noiser=None) exact_set_less_one = configs.SketchEstimatorConfig( name='exact_set_less_one', sketch_factory=exact_set.ExactSet.get_sketch_factory(), estimator=exact_set.LessOneEstimator(), noiser=exact_set.AddRandomElementsNoiser( num_random_elements=0, random_state=np.random.RandomState())) self.sketch_estimator_config_list = (exact_set_lossless, exact_set_less_one) self.evaluation_config = configs.EvaluationConfig( name='test_evaluation', num_runs=2, scenario_config_list=[ configs.ScenarioConfig( name='ind1', set_generator_factory=( set_generator.IndependentSetGenerator. get_generator_factory(universe_size=10, num_sets=5, set_size=1))), configs.ScenarioConfig( name='ind2', set_generator_factory=( set_generator.IndependentSetGenerator. get_generator_factory(universe_size=10, num_sets=5, set_size=1))), ]) self.run_name = 'test_run' def _get_test_evaluator(out_dir): return evaluator.Evaluator( evaluation_config=self.evaluation_config, sketch_estimator_config_list=self.sketch_estimator_config_list, run_name=self.run_name, out_dir=out_dir) self.get_test_evaluator = _get_test_evaluator def _get_test_analyzer(out_dir, evaluation_dir): return analyzer.CardinalityEstimatorEvaluationAnalyzer( out_dir=out_dir, evaluation_directory=evaluation_dir, evaluation_run_name=self.run_name, evaluation_name=self.evaluation_config.name) self.get_test_analyzer = _get_test_analyzer
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') noiser_flip_probability = 1 / (1 + np.exp(FLAGS.noiser_epsilon)) universe_size = int(100 * FLAGS.sketch_size) ## config all decay rates estimator_config_list = [] for a in FLAGS.exponential_bloom_filter_decay_rate: estimator_config_exponential_bloom_filter = SketchEstimatorConfig( name='exp_BF_' + str(int(a)), sketch_factory=ExponentialBloomFilter.get_sketch_factory( FLAGS.sketch_size, a), estimator=FirstMomentEstimator( method='exp', denoiser=SurrealDenoiser(probability=noiser_flip_probability)), sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon)) estimator_config_list += [estimator_config_exponential_bloom_filter] # config evaluation scenario_config_list = [] for set_size_ratio in FLAGS.set_size_ratio: set_size = int(set_size_ratio * FLAGS.sketch_size) ## list scenarios scenario_config_list += [ configs.ScenarioConfig( name=str(int(set_size_ratio)), set_generator_factory=(set_generator.IndependentSetGenerator. get_generator_factory_with_num_and_size( universe_size=universe_size, num_sets=FLAGS.number_of_sets, set_size=set_size))) ] evaluation_config = configs.EvaluationConfig( name='3_vary_decay_rate_' + str(int(FLAGS.sketch_size / 1000)) + "k", num_runs=FLAGS.number_of_trials, scenario_config_list=scenario_config_list) generate_results = evaluator.Evaluator( evaluation_config=evaluation_config, sketch_estimator_config_list=estimator_config_list, run_name="eval_adbf_result", out_dir=".", workers=10) generate_results()
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') ## config all decay rates estimator_config_list = [] for sketch_size in FLAGS.sketch_size: for epsilon in FLAGS.noiser_epsilon: estimator_config_exponential_bloom_filter = SketchEstimatorConfig( ## flipping prob name=str(int(sketch_size / 1000)) + "k_" + \ "{:.2f}".format(1 / (1 + np.exp(epsilon))), sketch_factory=ExponentialBloomFilter.get_sketch_factory( sketch_size, FLAGS.exponential_bloom_filter_decay_rate), estimator=FirstMomentEstimator( method='exp', denoiser=SurrealDenoiser(epsilon)), sketch_noiser=BlipNoiser(epsilon)) estimator_config_list += [ estimator_config_exponential_bloom_filter ] # config evaluation scenario_config_list = [] for universe_size in FLAGS.universe_size: scenario_config_list += [ configs.ScenarioConfig( name="{:.1f}".format(universe_size / 1000000), set_generator_factory=(set_generator.IndependentSetGenerator. get_generator_factory_with_num_and_size( universe_size=universe_size, num_sets=FLAGS.number_of_sets, set_size=FLAGS.set_size))) ] evaluation_config = configs.EvaluationConfig( name='5_prediction', num_runs=FLAGS.number_of_trials, scenario_config_list=scenario_config_list) generate_results = evaluator.Evaluator( evaluation_config=evaluation_config, sketch_estimator_config_list=estimator_config_list, run_name="eval_adbf_result", out_dir=".", workers=10) generate_results()
def setUp(self): super(EvaluatorTest, self).setUp() exact_set_lossless = simulator.SketchEstimatorConfig( name='exact_set_lossless', sketch_factory=exact_set.ExactSet.get_sketch_factory(), estimator=exact_set.LosslessEstimator()) exact_set_less_one = simulator.SketchEstimatorConfig( name='exact_set_less_one', sketch_factory=exact_set.ExactSet.get_sketch_factory(), estimator=exact_set.LessOneEstimator(), sketch_noiser=exact_set.AddRandomElementsNoiser( num_random_elements=0, random_state=np.random.RandomState())) self.sketch_estimator_config_list = (exact_set_lossless, exact_set_less_one) self.evaluation_config = configs.EvaluationConfig( name='test_evaluation', num_runs=2, scenario_config_list=[ configs.ScenarioConfig( name='ind1', set_generator_factory=( set_generator.IndependentSetGenerator. get_generator_factory_with_num_and_size( universe_size=10, num_sets=2, set_size=5))), configs.ScenarioConfig( name='ind2', set_generator_factory=( set_generator.IndependentSetGenerator. get_generator_factory_with_num_and_size( universe_size=10, num_sets=2, set_size=5))), ]) self.run_name = 'test_run' def _get_test_evaluator(out_dir, overwrite=False): return evaluator.Evaluator( evaluation_config=self.evaluation_config, sketch_estimator_config_list=self.sketch_estimator_config_list, run_name=self.run_name, out_dir=out_dir, overwrite=overwrite) self.get_test_evaluator = _get_test_evaluator
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') noiser_flip_probability = 1 / (1 + np.exp(FLAGS.noiser_epsilon)) ## list three adbf estimators estimator_config_geometric_bloom_filter = SketchEstimatorConfig( name='geo_BF', sketch_factory=GeometricBloomFilter.get_sketch_factory( FLAGS.sketch_size, FLAGS.geometric_bloom_filter_probability), estimator=FirstMomentEstimator( method='geo', denoiser=SurrealDenoiser( probability=noiser_flip_probability)), sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon)) estimator_config_logarithmic_bloom_filter = SketchEstimatorConfig( name='log_BF', sketch_factory=LogarithmicBloomFilter.get_sketch_factory( FLAGS.sketch_size), estimator=FirstMomentEstimator( method='log', denoiser=SurrealDenoiser( probability=noiser_flip_probability)), sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon)) estimator_config_exponential_bloom_filter = SketchEstimatorConfig( name='exp_BF', sketch_factory=ExponentialBloomFilter.get_sketch_factory( FLAGS.sketch_size, FLAGS.exponential_bloom_filter_decay_rate), estimator=FirstMomentEstimator( method='exp', denoiser=SurrealDenoiser( probability=noiser_flip_probability)), sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon)) estimator_config_list = [ estimator_config_geometric_bloom_filter, estimator_config_logarithmic_bloom_filter, estimator_config_exponential_bloom_filter, ] # list scenarios of different set sizes scenario_config_list = [] for set_size_ratio in FLAGS.set_size_ratio: set_size = int(set_size_ratio * FLAGS.sketch_size) scenario_config_list += [ configs.ScenarioConfig( name="{:.1f}".format(set_size_ratio), set_generator_factory=( set_generator.IndependentSetGenerator .get_generator_factory_with_num_and_size( universe_size=FLAGS.universe_size, num_sets=FLAGS.number_of_sets, set_size=set_size))) ] evaluation_config = configs.EvaluationConfig( name='2_vary_set_size', num_runs=FLAGS.number_of_trials, scenario_config_list=scenario_config_list) generate_results = evaluator.Evaluator( evaluation_config=evaluation_config, sketch_estimator_config_list=estimator_config_list, run_name="eval_adbf_result", out_dir=".", workers=10) generate_results()
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') ## list all estimators estimator_config_list = [] for epsilon in FLAGS.noiser_epsilon: ## flipping prob noiser_flip_probability = 1 / (1 + np.exp(epsilon)) # estimator_config_bloom_filter = SketchEstimatorConfig( # name='unif_BF_' + "{:.2f}".format(noiser_flip_probability), # sketch_factory=BloomFilter.get_sketch_factory( # FLAGS.sketch_size, FLAGS.num_bloom_filter_hashes), # estimator=UnionEstimator(), # sketch_noiser=BlipNoiser(epsilon)) estimator_config_geometric_bloom_filter = SketchEstimatorConfig( name='geo_BF_' + "{:.2f}".format(noiser_flip_probability), sketch_factory=GeometricBloomFilter.get_sketch_factory( FLAGS.sketch_size, FLAGS.geometric_bloom_filter_probability), estimator=FirstMomentEstimator(method='geo', denoiser=SurrealDenoiser(epsilon)), sketch_noiser=BlipNoiser(epsilon)) estimator_config_logarithmic_bloom_filter = SketchEstimatorConfig( name='log_BF_' + "{:.2f}".format(noiser_flip_probability), sketch_factory=LogarithmicBloomFilter.get_sketch_factory( FLAGS.sketch_size), estimator=FirstMomentEstimator(method='log', denoiser=SurrealDenoiser(epsilon)), sketch_noiser=BlipNoiser(epsilon)) estimator_config_exponential_bloom_filter = SketchEstimatorConfig( name='exp_BF_' + "{:.2f}".format(noiser_flip_probability), sketch_factory=ExponentialBloomFilter.get_sketch_factory( FLAGS.sketch_size, FLAGS.exponential_bloom_filter_decay_rate), estimator=FirstMomentEstimator(method='exp', denoiser=SurrealDenoiser(epsilon)), sketch_noiser=BlipNoiser(epsilon)) estimator_config_list += [ # estimator_config_bloom_filter, estimator_config_geometric_bloom_filter, estimator_config_logarithmic_bloom_filter, estimator_config_exponential_bloom_filter, ] # config evaluation evaluation_config = configs.EvaluationConfig( name='1_vary_flip_prob', num_runs=FLAGS.number_of_trials, scenario_config_list=[ configs.ScenarioConfig( name='independent', set_generator_factory=(set_generator.IndependentSetGenerator. get_generator_factory_with_num_and_size( universe_size=FLAGS.universe_size, num_sets=FLAGS.number_of_sets, set_size=FLAGS.set_size))) ]) generate_results = evaluator.Evaluator( evaluation_config=evaluation_config, sketch_estimator_config_list=estimator_config_list, run_name="eval_adbf_result", out_dir=".", workers=10) generate_results()