def setUp(self):
        super(FrequencyEstimatorEvaluationAnalyzerTest, self).setUp()

        self.sketch_estimator_config_list = [
            configs.SketchEstimatorConfig(
                name=evaluation_configs.construct_sketch_estimator_config_name(
                    sketch_name='exact_multi_set',
                    sketch_config='10000',
                    estimator_name='lossless'),
                sketch_factory=exact_set.ExactMultiSet.get_sketch_factory(),
                estimator=exact_set.LosslessEstimator(),
                max_frequency=2,
            ),
        ]

        self.evaluation_config = configs.EvaluationConfig(
            name='frequency_end_to_end_test',
            num_runs=1,
            scenario_config_list=[
                configs.ScenarioConfig(
                    name='homogeneous',
                    set_generator_factory=(
                        frequency_set_generator.HomogeneousMultiSetGenerator.
                        get_generator_factory_with_num_and_size(
                            universe_size=100,
                            num_sets=3,
                            set_size=50,
                            freq_rates=[5] * 3,
                            freq_cap=8,
                        )))
            ])

        self.run_name = 'test_run'
예제 #2
0
    def setUp(self):
        super(ReportGeneratorTest, self).setUp()
        exact_set_lossless = simulator.SketchEstimatorConfig(
            name='exact_set-infty-infty-lossless',
            sketch_factory=exact_set.ExactSet.get_sketch_factory(),
            estimator=exact_set.LosslessEstimator(),
            sketch_noiser=None,
            estimate_noiser=None)
        exact_set_less_one = simulator.SketchEstimatorConfig(
            name='exact_set-infty-infty-less_one',
            sketch_factory=exact_set.ExactSet.get_sketch_factory(),
            estimator=exact_set.LessOneEstimator(),
            sketch_noiser=exact_set.AddRandomElementsNoiser(
                num_random_elements=0, random_state=np.random.RandomState()),
            estimate_noiser=None)
        self.sketch_estimator_config_list = (exact_set_lossless,
                                             exact_set_less_one)

        self.evaluation_config = configs.EvaluationConfig(
            name='test_evaluation',
            num_runs=2,
            scenario_config_list=[
                configs.ScenarioConfig(
                    name='ind1',
                    set_generator_factory=(
                        set_generator.IndependentSetGenerator.
                        get_generator_factory_with_num_and_size(
                            universe_size=10, num_sets=5, set_size=1))),
                configs.ScenarioConfig(
                    name='ind2',
                    set_generator_factory=(
                        set_generator.IndependentSetGenerator.
                        get_generator_factory_with_num_and_size(
                            universe_size=10, num_sets=5, set_size=1))),
            ])

        self.evaluation_run_name = 'test_run'

        def _run_evaluation_and_simulation(out_dir):
            self.evaluator = evaluator.Evaluator(
                evaluation_config=self.evaluation_config,
                sketch_estimator_config_list=self.sketch_estimator_config_list,
                run_name=self.evaluation_run_name,
                out_dir=out_dir)
            self.evaluator()

            self.analyzer = analyzer.CardinalityEstimatorEvaluationAnalyzer(
                out_dir=out_dir,
                evaluation_directory=out_dir,
                evaluation_run_name=self.evaluation_run_name,
                evaluation_name=self.evaluation_config.name,
                estimable_criteria_list=[(0.05, 0.95), (1.01, 0.9)])
            self.analyzer()

        self.run_evaluation_and_simulation = _run_evaluation_and_simulation
예제 #3
0
    def setUp(self):
        super(AnalyzerTest, self).setUp()
        exact_set_lossless = configs.SketchEstimatorConfig(
            name='exact_set_lossless',
            sketch_factory=exact_set.ExactSet.get_sketch_factory(),
            estimator=exact_set.LosslessEstimator(),
            noiser=None)
        exact_set_less_one = configs.SketchEstimatorConfig(
            name='exact_set_less_one',
            sketch_factory=exact_set.ExactSet.get_sketch_factory(),
            estimator=exact_set.LessOneEstimator(),
            noiser=exact_set.AddRandomElementsNoiser(
                num_random_elements=0, random_state=np.random.RandomState()))
        self.sketch_estimator_config_list = (exact_set_lossless,
                                             exact_set_less_one)

        self.evaluation_config = configs.EvaluationConfig(
            name='test_evaluation',
            num_runs=2,
            scenario_config_list=[
                configs.ScenarioConfig(
                    name='ind1',
                    set_generator_factory=(
                        set_generator.IndependentSetGenerator.
                        get_generator_factory(universe_size=10,
                                              num_sets=5,
                                              set_size=1))),
                configs.ScenarioConfig(
                    name='ind2',
                    set_generator_factory=(
                        set_generator.IndependentSetGenerator.
                        get_generator_factory(universe_size=10,
                                              num_sets=5,
                                              set_size=1))),
            ])

        self.run_name = 'test_run'

        def _get_test_evaluator(out_dir):
            return evaluator.Evaluator(
                evaluation_config=self.evaluation_config,
                sketch_estimator_config_list=self.sketch_estimator_config_list,
                run_name=self.run_name,
                out_dir=out_dir)

        self.get_test_evaluator = _get_test_evaluator

        def _get_test_analyzer(out_dir, evaluation_dir):
            return analyzer.CardinalityEstimatorEvaluationAnalyzer(
                out_dir=out_dir,
                evaluation_directory=evaluation_dir,
                evaluation_run_name=self.run_name,
                evaluation_name=self.evaluation_config.name)

        self.get_test_analyzer = _get_test_analyzer
예제 #4
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    noiser_flip_probability = 1 / (1 + np.exp(FLAGS.noiser_epsilon))
    universe_size = int(100 * FLAGS.sketch_size)

    ## config all decay rates
    estimator_config_list = []
    for a in FLAGS.exponential_bloom_filter_decay_rate:

        estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
            name='exp_BF_' + str(int(a)),
            sketch_factory=ExponentialBloomFilter.get_sketch_factory(
                FLAGS.sketch_size, a),
            estimator=FirstMomentEstimator(
                method='exp',
                denoiser=SurrealDenoiser(probability=noiser_flip_probability)),
            sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon))

        estimator_config_list += [estimator_config_exponential_bloom_filter]

    # config evaluation
    scenario_config_list = []
    for set_size_ratio in FLAGS.set_size_ratio:
        set_size = int(set_size_ratio * FLAGS.sketch_size)
        ## list scenarios
        scenario_config_list += [
            configs.ScenarioConfig(
                name=str(int(set_size_ratio)),
                set_generator_factory=(set_generator.IndependentSetGenerator.
                                       get_generator_factory_with_num_and_size(
                                           universe_size=universe_size,
                                           num_sets=FLAGS.number_of_sets,
                                           set_size=set_size)))
        ]
    evaluation_config = configs.EvaluationConfig(
        name='3_vary_decay_rate_' + str(int(FLAGS.sketch_size / 1000)) + "k",
        num_runs=FLAGS.number_of_trials,
        scenario_config_list=scenario_config_list)

    generate_results = evaluator.Evaluator(
        evaluation_config=evaluation_config,
        sketch_estimator_config_list=estimator_config_list,
        run_name="eval_adbf_result",
        out_dir=".",
        workers=10)
    generate_results()
예제 #5
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    ## config all decay rates
    estimator_config_list = []
    for sketch_size in FLAGS.sketch_size:
        for epsilon in FLAGS.noiser_epsilon:
            estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
                ## flipping prob
                name=str(int(sketch_size / 1000)) + "k_" + \
                    "{:.2f}".format(1 / (1 + np.exp(epsilon))),
                sketch_factory=ExponentialBloomFilter.get_sketch_factory(
                    sketch_size, FLAGS.exponential_bloom_filter_decay_rate),
                estimator=FirstMomentEstimator(
                    method='exp',
                    denoiser=SurrealDenoiser(epsilon)),
                sketch_noiser=BlipNoiser(epsilon))
            estimator_config_list += [
                estimator_config_exponential_bloom_filter
            ]

    # config evaluation
    scenario_config_list = []
    for universe_size in FLAGS.universe_size:
        scenario_config_list += [
            configs.ScenarioConfig(
                name="{:.1f}".format(universe_size / 1000000),
                set_generator_factory=(set_generator.IndependentSetGenerator.
                                       get_generator_factory_with_num_and_size(
                                           universe_size=universe_size,
                                           num_sets=FLAGS.number_of_sets,
                                           set_size=FLAGS.set_size)))
        ]
    evaluation_config = configs.EvaluationConfig(
        name='5_prediction',
        num_runs=FLAGS.number_of_trials,
        scenario_config_list=scenario_config_list)

    generate_results = evaluator.Evaluator(
        evaluation_config=evaluation_config,
        sketch_estimator_config_list=estimator_config_list,
        run_name="eval_adbf_result",
        out_dir=".",
        workers=10)
    generate_results()
    def setUp(self):
        super(EvaluatorTest, self).setUp()

        exact_set_lossless = simulator.SketchEstimatorConfig(
            name='exact_set_lossless',
            sketch_factory=exact_set.ExactSet.get_sketch_factory(),
            estimator=exact_set.LosslessEstimator())
        exact_set_less_one = simulator.SketchEstimatorConfig(
            name='exact_set_less_one',
            sketch_factory=exact_set.ExactSet.get_sketch_factory(),
            estimator=exact_set.LessOneEstimator(),
            sketch_noiser=exact_set.AddRandomElementsNoiser(
                num_random_elements=0, random_state=np.random.RandomState()))
        self.sketch_estimator_config_list = (exact_set_lossless,
                                             exact_set_less_one)

        self.evaluation_config = configs.EvaluationConfig(
            name='test_evaluation',
            num_runs=2,
            scenario_config_list=[
                configs.ScenarioConfig(
                    name='ind1',
                    set_generator_factory=(
                        set_generator.IndependentSetGenerator.
                        get_generator_factory_with_num_and_size(
                            universe_size=10, num_sets=2, set_size=5))),
                configs.ScenarioConfig(
                    name='ind2',
                    set_generator_factory=(
                        set_generator.IndependentSetGenerator.
                        get_generator_factory_with_num_and_size(
                            universe_size=10, num_sets=2, set_size=5))),
            ])

        self.run_name = 'test_run'

        def _get_test_evaluator(out_dir, overwrite=False):
            return evaluator.Evaluator(
                evaluation_config=self.evaluation_config,
                sketch_estimator_config_list=self.sketch_estimator_config_list,
                run_name=self.run_name,
                out_dir=out_dir,
                overwrite=overwrite)

        self.get_test_evaluator = _get_test_evaluator
예제 #7
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    noiser_flip_probability = 1 / (1 + np.exp(FLAGS.noiser_epsilon))

    ## list three adbf estimators
    estimator_config_geometric_bloom_filter = SketchEstimatorConfig(
        name='geo_BF',
        sketch_factory=GeometricBloomFilter.get_sketch_factory(
            FLAGS.sketch_size, FLAGS.geometric_bloom_filter_probability),
        estimator=FirstMomentEstimator(
            method='geo',
            denoiser=SurrealDenoiser(
                probability=noiser_flip_probability)), 
        sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon))

    estimator_config_logarithmic_bloom_filter = SketchEstimatorConfig(
        name='log_BF',
        sketch_factory=LogarithmicBloomFilter.get_sketch_factory(
            FLAGS.sketch_size),
        estimator=FirstMomentEstimator(
            method='log',
            denoiser=SurrealDenoiser(
                probability=noiser_flip_probability)), 
        sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon))

    estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
        name='exp_BF',
        sketch_factory=ExponentialBloomFilter.get_sketch_factory(
            FLAGS.sketch_size, FLAGS.exponential_bloom_filter_decay_rate),
        estimator=FirstMomentEstimator(
            method='exp',
            denoiser=SurrealDenoiser(
                probability=noiser_flip_probability)), 
        sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon))
        
    estimator_config_list = [
        estimator_config_geometric_bloom_filter,
        estimator_config_logarithmic_bloom_filter,
        estimator_config_exponential_bloom_filter,
    ]

    # list scenarios of different set sizes
    scenario_config_list = []
    for set_size_ratio in FLAGS.set_size_ratio: 
        set_size = int(set_size_ratio * FLAGS.sketch_size)
        scenario_config_list += [
            configs.ScenarioConfig(
                name="{:.1f}".format(set_size_ratio),
                set_generator_factory=(
                    set_generator.IndependentSetGenerator
                    .get_generator_factory_with_num_and_size(
                        universe_size=FLAGS.universe_size, 
                        num_sets=FLAGS.number_of_sets, 
                        set_size=set_size)))
        ]
    evaluation_config = configs.EvaluationConfig(
        name='2_vary_set_size',
        num_runs=FLAGS.number_of_trials,
        scenario_config_list=scenario_config_list)

    generate_results = evaluator.Evaluator(
        evaluation_config=evaluation_config,
        sketch_estimator_config_list=estimator_config_list,
        run_name="eval_adbf_result",
        out_dir=".",
        workers=10)
    generate_results()
예제 #8
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    ## list all estimators
    estimator_config_list = []
    for epsilon in FLAGS.noiser_epsilon:
        ## flipping prob
        noiser_flip_probability = 1 / (1 + np.exp(epsilon))

        # estimator_config_bloom_filter = SketchEstimatorConfig(
        #     name='unif_BF_' + "{:.2f}".format(noiser_flip_probability),
        #     sketch_factory=BloomFilter.get_sketch_factory(
        #         FLAGS.sketch_size, FLAGS.num_bloom_filter_hashes),
        #     estimator=UnionEstimator(),
        #     sketch_noiser=BlipNoiser(epsilon))

        estimator_config_geometric_bloom_filter = SketchEstimatorConfig(
            name='geo_BF_' + "{:.2f}".format(noiser_flip_probability),
            sketch_factory=GeometricBloomFilter.get_sketch_factory(
                FLAGS.sketch_size, FLAGS.geometric_bloom_filter_probability),
            estimator=FirstMomentEstimator(method='geo',
                                           denoiser=SurrealDenoiser(epsilon)),
            sketch_noiser=BlipNoiser(epsilon))

        estimator_config_logarithmic_bloom_filter = SketchEstimatorConfig(
            name='log_BF_' + "{:.2f}".format(noiser_flip_probability),
            sketch_factory=LogarithmicBloomFilter.get_sketch_factory(
                FLAGS.sketch_size),
            estimator=FirstMomentEstimator(method='log',
                                           denoiser=SurrealDenoiser(epsilon)),
            sketch_noiser=BlipNoiser(epsilon))

        estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
            name='exp_BF_' + "{:.2f}".format(noiser_flip_probability),
            sketch_factory=ExponentialBloomFilter.get_sketch_factory(
                FLAGS.sketch_size, FLAGS.exponential_bloom_filter_decay_rate),
            estimator=FirstMomentEstimator(method='exp',
                                           denoiser=SurrealDenoiser(epsilon)),
            sketch_noiser=BlipNoiser(epsilon))

        estimator_config_list += [
            # estimator_config_bloom_filter,
            estimator_config_geometric_bloom_filter,
            estimator_config_logarithmic_bloom_filter,
            estimator_config_exponential_bloom_filter,
        ]

    # config evaluation
    evaluation_config = configs.EvaluationConfig(
        name='1_vary_flip_prob',
        num_runs=FLAGS.number_of_trials,
        scenario_config_list=[
            configs.ScenarioConfig(
                name='independent',
                set_generator_factory=(set_generator.IndependentSetGenerator.
                                       get_generator_factory_with_num_and_size(
                                           universe_size=FLAGS.universe_size,
                                           num_sets=FLAGS.number_of_sets,
                                           set_size=FLAGS.set_size)))
        ])

    generate_results = evaluator.Evaluator(
        evaluation_config=evaluation_config,
        sketch_estimator_config_list=estimator_config_list,
        run_name="eval_adbf_result",
        out_dir=".",
        workers=10)
    generate_results()