def _get_test_evaluator(out_dir, overwrite=False): return evaluator.Evaluator( evaluation_config=self.evaluation_config, sketch_estimator_config_list=self.sketch_estimator_config_list, run_name=self.run_name, out_dir=out_dir, overwrite=overwrite)
def _run_evaluation_and_simulation(out_dir): self.evaluator = evaluator.Evaluator( evaluation_config=self.evaluation_config, sketch_estimator_config_list=self.sketch_estimator_config_list, run_name=self.evaluation_run_name, out_dir=out_dir) self.evaluator() self.analyzer = analyzer.CardinalityEstimatorEvaluationAnalyzer( out_dir=out_dir, evaluation_directory=out_dir, evaluation_run_name=self.evaluation_run_name, evaluation_name=self.evaluation_config.name, estimable_criteria_list=[(0.05, 0.95), (1.01, 0.9)]) self.analyzer()
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') noiser_flip_probability = 1 / (1 + np.exp(FLAGS.noiser_epsilon)) universe_size = int(100 * FLAGS.sketch_size) ## config all decay rates estimator_config_list = [] for a in FLAGS.exponential_bloom_filter_decay_rate: estimator_config_exponential_bloom_filter = SketchEstimatorConfig( name='exp_BF_' + str(int(a)), sketch_factory=ExponentialBloomFilter.get_sketch_factory( FLAGS.sketch_size, a), estimator=FirstMomentEstimator( method='exp', denoiser=SurrealDenoiser(probability=noiser_flip_probability)), sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon)) estimator_config_list += [estimator_config_exponential_bloom_filter] # config evaluation scenario_config_list = [] for set_size_ratio in FLAGS.set_size_ratio: set_size = int(set_size_ratio * FLAGS.sketch_size) ## list scenarios scenario_config_list += [ configs.ScenarioConfig( name=str(int(set_size_ratio)), set_generator_factory=(set_generator.IndependentSetGenerator. get_generator_factory_with_num_and_size( universe_size=universe_size, num_sets=FLAGS.number_of_sets, set_size=set_size))) ] evaluation_config = configs.EvaluationConfig( name='3_vary_decay_rate_' + str(int(FLAGS.sketch_size / 1000)) + "k", num_runs=FLAGS.number_of_trials, scenario_config_list=scenario_config_list) generate_results = evaluator.Evaluator( evaluation_config=evaluation_config, sketch_estimator_config_list=estimator_config_list, run_name="eval_adbf_result", out_dir=".", workers=10) generate_results()
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') ## config all decay rates estimator_config_list = [] for sketch_size in FLAGS.sketch_size: for epsilon in FLAGS.noiser_epsilon: estimator_config_exponential_bloom_filter = SketchEstimatorConfig( ## flipping prob name=str(int(sketch_size / 1000)) + "k_" + \ "{:.2f}".format(1 / (1 + np.exp(epsilon))), sketch_factory=ExponentialBloomFilter.get_sketch_factory( sketch_size, FLAGS.exponential_bloom_filter_decay_rate), estimator=FirstMomentEstimator( method='exp', denoiser=SurrealDenoiser(epsilon)), sketch_noiser=BlipNoiser(epsilon)) estimator_config_list += [ estimator_config_exponential_bloom_filter ] # config evaluation scenario_config_list = [] for universe_size in FLAGS.universe_size: scenario_config_list += [ configs.ScenarioConfig( name="{:.1f}".format(universe_size / 1000000), set_generator_factory=(set_generator.IndependentSetGenerator. get_generator_factory_with_num_and_size( universe_size=universe_size, num_sets=FLAGS.number_of_sets, set_size=FLAGS.set_size))) ] evaluation_config = configs.EvaluationConfig( name='5_prediction', num_runs=FLAGS.number_of_trials, scenario_config_list=scenario_config_list) generate_results = evaluator.Evaluator( evaluation_config=evaluation_config, sketch_estimator_config_list=estimator_config_list, run_name="eval_adbf_result", out_dir=".", workers=10) generate_results()
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') required_flags = ('evaluation_out_dir', 'analysis_out_dir', 'evaluation_config', 'sketch_estimator_configs', 'evaluation_run_name', 'num_runs') for f in required_flags: flags.mark_flag_as_required(f) logging.set_verbosity(logging.INFO) logging.info('====Running %s using evaluation %s for:\n%s', FLAGS.evaluation_config, FLAGS.evaluation_run_name, ', '.join(FLAGS.sketch_estimator_configs)) evaluation_config = evaluation_configs.NAME_TO_EVALUATION_CONFIGS[ FLAGS.evaluation_config](FLAGS.num_runs) sketch_estimator_config_list = [ evaluation_configs.NAME_TO_ESTIMATOR_CONFIGS[conf] for conf in FLAGS.sketch_estimator_configs ] # Run simulations. generate_results = evaluator.Evaluator( evaluation_config=evaluation_config, sketch_estimator_config_list=sketch_estimator_config_list, run_name=FLAGS.evaluation_run_name, out_dir=FLAGS.evaluation_out_dir) generate_results() # Analyze results. logging.info('====Analyzing the results.') generate_summary = analyzer.CardinalityEstimatorEvaluationAnalyzer( out_dir=FLAGS.analysis_out_dir, evaluation_directory=FLAGS.evaluation_out_dir, evaluation_run_name=FLAGS.evaluation_run_name, evaluation_name=evaluation_config.name, error_margin=FLAGS.error_margin, proportion_of_runs=FLAGS.proportion_of_runs, plot_params={ analyzer.BOXPLOT_SIZE_WIDTH_INCH: FLAGS.boxplot_size_width_inch, analyzer.BOXPLOT_SIZE_HEIGHT_INCH: FLAGS.boxplot_size_height_inch, }) generate_summary() logging.info('====Evaluation and analysis done!')
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') noiser_flip_probability = 1 / (1 + np.exp(FLAGS.noiser_epsilon)) ## list three adbf estimators estimator_config_geometric_bloom_filter = SketchEstimatorConfig( name='geo_BF', sketch_factory=GeometricBloomFilter.get_sketch_factory( FLAGS.sketch_size, FLAGS.geometric_bloom_filter_probability), estimator=FirstMomentEstimator( method='geo', denoiser=SurrealDenoiser( probability=noiser_flip_probability)), sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon)) estimator_config_logarithmic_bloom_filter = SketchEstimatorConfig( name='log_BF', sketch_factory=LogarithmicBloomFilter.get_sketch_factory( FLAGS.sketch_size), estimator=FirstMomentEstimator( method='log', denoiser=SurrealDenoiser( probability=noiser_flip_probability)), sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon)) estimator_config_exponential_bloom_filter = SketchEstimatorConfig( name='exp_BF', sketch_factory=ExponentialBloomFilter.get_sketch_factory( FLAGS.sketch_size, FLAGS.exponential_bloom_filter_decay_rate), estimator=FirstMomentEstimator( method='exp', denoiser=SurrealDenoiser( probability=noiser_flip_probability)), sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon)) estimator_config_list = [ estimator_config_geometric_bloom_filter, estimator_config_logarithmic_bloom_filter, estimator_config_exponential_bloom_filter, ] # list scenarios of different set sizes scenario_config_list = [] for set_size_ratio in FLAGS.set_size_ratio: set_size = int(set_size_ratio * FLAGS.sketch_size) scenario_config_list += [ configs.ScenarioConfig( name="{:.1f}".format(set_size_ratio), set_generator_factory=( set_generator.IndependentSetGenerator .get_generator_factory_with_num_and_size( universe_size=FLAGS.universe_size, num_sets=FLAGS.number_of_sets, set_size=set_size))) ] evaluation_config = configs.EvaluationConfig( name='2_vary_set_size', num_runs=FLAGS.number_of_trials, scenario_config_list=scenario_config_list) generate_results = evaluator.Evaluator( evaluation_config=evaluation_config, sketch_estimator_config_list=estimator_config_list, run_name="eval_adbf_result", out_dir=".", workers=10) generate_results()
def _run(run_evaluation, run_analysis, generate_html_report, evaluation_out_dir, analysis_out_dir, report_out_dir, evaluation_config, sketch_estimator_configs, evaluation_run_name, num_runs, num_workers, error_margin, proportion_of_runs, boxplot_xlabel_rotate, boxplot_size_width_inch, boxplot_size_height_inch, analysis_type, max_frequency, barplot_size_width_inch=None, barplot_size_height_inch=None, universe_size=None): """Run evaluation.""" evaluation_config_args = {'num_runs': num_runs} if universe_size is not None: evaluation_config_args.update({'universe_size': int(universe_size)}) evaluation_config = evaluation_configs.get_evaluation_config( evaluation_config)(**evaluation_config_args) sketch_estimator_config_list = evaluation_configs.get_estimator_configs( sketch_estimator_configs, max_frequency) if run_evaluation: logging.info('====Running %s using evaluation %s for:\n%s', evaluation_config, evaluation_run_name, ', '.join(sketch_estimator_configs)) generate_results = evaluator.Evaluator( evaluation_config=evaluation_config, sketch_estimator_config_list=sketch_estimator_config_list, run_name=evaluation_run_name, out_dir=evaluation_out_dir, workers=num_workers) generate_results() error_margin = [float(x) for x in error_margin] proportion_of_runs = [float(x) for x in proportion_of_runs] estimable_criteria_list = list( map(tuple, zip(error_margin, proportion_of_runs))) if analysis_type == 'frequency': estimator_analyzer_func = analyzer.FrequencyEstimatorEvaluationAnalyzer report_generator_func = report_generator.FrequencyReportGenerator else: estimator_analyzer_func = analyzer.CardinalityEstimatorEvaluationAnalyzer report_generator_func = report_generator.CardinalityReportGenerator if run_analysis: logging.info('====Analyzing the results.') generate_summary = estimator_analyzer_func( out_dir=analysis_out_dir, evaluation_directory=evaluation_out_dir, evaluation_run_name=evaluation_run_name, evaluation_name=evaluation_config.name, estimable_criteria_list=estimable_criteria_list, plot_params={ analyzer.XLABEL_ROTATE: boxplot_xlabel_rotate, analyzer.BOXPLOT_SIZE_WIDTH_INCH: boxplot_size_width_inch, analyzer.BOXPLOT_SIZE_HEIGHT_INCH: boxplot_size_height_inch, analyzer.BARPLOT_SIZE_WIDTH_INCH: barplot_size_width_inch, analyzer.BARPLOT_SIZE_HEIGHT_INCH: barplot_size_height_inch, }) generate_summary() logging.info('====Evaluation and analysis done!') if generate_html_report: generate_report = report_generator_func( out_dir=report_out_dir, analysis_out_dir=analysis_out_dir, evaluation_run_name=evaluation_run_name, evaluation_name=evaluation_config.name) report_url = generate_report() logging.info('====Report generated: %s.', report_url)
def get_test_evaluator(self, out_dir): return evaluator.Evaluator( evaluation_config=self.evaluation_config, sketch_estimator_config_list=self.sketch_estimator_config_list, run_name=self.run_name, out_dir=out_dir)
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') ## list all estimators estimator_config_list = [] for epsilon in FLAGS.noiser_epsilon: ## flipping prob noiser_flip_probability = 1 / (1 + np.exp(epsilon)) # estimator_config_bloom_filter = SketchEstimatorConfig( # name='unif_BF_' + "{:.2f}".format(noiser_flip_probability), # sketch_factory=BloomFilter.get_sketch_factory( # FLAGS.sketch_size, FLAGS.num_bloom_filter_hashes), # estimator=UnionEstimator(), # sketch_noiser=BlipNoiser(epsilon)) estimator_config_geometric_bloom_filter = SketchEstimatorConfig( name='geo_BF_' + "{:.2f}".format(noiser_flip_probability), sketch_factory=GeometricBloomFilter.get_sketch_factory( FLAGS.sketch_size, FLAGS.geometric_bloom_filter_probability), estimator=FirstMomentEstimator(method='geo', denoiser=SurrealDenoiser(epsilon)), sketch_noiser=BlipNoiser(epsilon)) estimator_config_logarithmic_bloom_filter = SketchEstimatorConfig( name='log_BF_' + "{:.2f}".format(noiser_flip_probability), sketch_factory=LogarithmicBloomFilter.get_sketch_factory( FLAGS.sketch_size), estimator=FirstMomentEstimator(method='log', denoiser=SurrealDenoiser(epsilon)), sketch_noiser=BlipNoiser(epsilon)) estimator_config_exponential_bloom_filter = SketchEstimatorConfig( name='exp_BF_' + "{:.2f}".format(noiser_flip_probability), sketch_factory=ExponentialBloomFilter.get_sketch_factory( FLAGS.sketch_size, FLAGS.exponential_bloom_filter_decay_rate), estimator=FirstMomentEstimator(method='exp', denoiser=SurrealDenoiser(epsilon)), sketch_noiser=BlipNoiser(epsilon)) estimator_config_list += [ # estimator_config_bloom_filter, estimator_config_geometric_bloom_filter, estimator_config_logarithmic_bloom_filter, estimator_config_exponential_bloom_filter, ] # config evaluation evaluation_config = configs.EvaluationConfig( name='1_vary_flip_prob', num_runs=FLAGS.number_of_trials, scenario_config_list=[ configs.ScenarioConfig( name='independent', set_generator_factory=(set_generator.IndependentSetGenerator. get_generator_factory_with_num_and_size( universe_size=FLAGS.universe_size, num_sets=FLAGS.number_of_sets, set_size=FLAGS.set_size))) ]) generate_results = evaluator.Evaluator( evaluation_config=evaluation_config, sketch_estimator_config_list=estimator_config_list, run_name="eval_adbf_result", out_dir=".", workers=10) generate_results()