Python SketchEstimatorConfig примеры, wfa_cardinality_estimation_evaluation_framework.evaluations.configs.SketchEstimatorConfig Python примеры использования

Пример #1

0

Показать файл

Файл: simulator_test.py Проект: pasin30055/cardinality_estimation_evaluation_framework

    def test_simulator_run_all_and_aggregate_write_file(self):
        sketch_estimator_config = SketchEstimatorConfig(
            name='exact_set-lossless',
            sketch_factory=ExactMultiSet,
            estimator=LosslessEstimator())
        set_generator_factory = (set_generator.IndependentSetGenerator.
                                 get_generator_factory_with_num_and_size(
                                     universe_size=1, num_sets=1, set_size=1))

        file_df = io.StringIO()
        file_df_agg = io.StringIO()
        sim = simulator.Simulator(
            num_runs=5,
            set_generator_factory=set_generator_factory,
            sketch_estimator_config=sketch_estimator_config,
            file_handle_raw=file_df,
            file_handle_agg=file_df_agg)
        df, df_agg = sim()

        # Test if the saved data frame is the same as the one returned from the
        # simulator.
        file_df.seek(0)
        df_from_csv = pd.read_csv(file_df)
        pd.testing.assert_frame_equal(df, df_from_csv)

        file_df_agg.seek(0)
        df_agg_from_csv = pd.read_csv(file_df_agg, header=[0, 1], index_col=0)
        pd.testing.assert_frame_equal(df_agg, df_agg_from_csv)

Пример #2

0

Показать файл

Файл: simulator_test.py Проект: pasin30055/cardinality_estimation_evaluation_framework

    def test_multiple_frequencies(self):
        sketch_estimator_config = SketchEstimatorConfig(
            name='exact-set-multiple-frequencies',
            sketch_factory=ExactMultiSet,
            estimator=LosslessEstimator(),
            max_frequency=3)
        set_generator_factory = (FakeSetGenerator.get_generator_factory(
            [[1, 1, 1, 2, 2, 3], [1, 1, 1, 3, 3, 4]]))
        sim = simulator.Simulator(
            num_runs=1,
            set_generator_factory=set_generator_factory,
            sketch_estimator_config=sketch_estimator_config)
        df, _ = sim()
        expected_columns = [
            'num_sets', simulator.ESTIMATED_CARDINALITY_BASENAME + '1',
            simulator.ESTIMATED_CARDINALITY_BASENAME + '2',
            simulator.ESTIMATED_CARDINALITY_BASENAME + '3',
            simulator.TRUE_CARDINALITY_BASENAME + '1',
            simulator.TRUE_CARDINALITY_BASENAME + '2',
            simulator.TRUE_CARDINALITY_BASENAME + '3',
            simulator.SHUFFLE_DISTANCE, 'run_index',
            simulator.RELATIVE_ERROR_BASENAME + '1',
            simulator.RELATIVE_ERROR_BASENAME + '2',
            simulator.RELATIVE_ERROR_BASENAME + '3'
        ]
        expected_data = [[1, 3, 2, 1, 3, 2, 1, 0., 0, 0., 0., 0.],
                         [2, 4, 3, 2, 4, 3, 2, 0., 0, 0., 0., 0.]]

        expected_df = pd.DataFrame(expected_data, columns=expected_columns)
        pd.testing.assert_frame_equal(df, expected_df)

Пример #3

0

Показать файл

Файл: simulator_test.py Проект: pasin30055/cardinality_estimation_evaluation_framework

 def test_simulator_run_one_with_estimate_noiser(self):
     fake_estimate_noiser = FakeEstimateNoiser()
     sketch_estimator_config = SketchEstimatorConfig(
         name='exact_set-lossless',
         sketch_factory=ExactMultiSet,
         estimator=LosslessEstimator(),
         estimate_noiser=fake_estimate_noiser)
     sim = get_simple_simulator(sketch_estimator_config)
     data_frame = sim.run_one()
     self.assertLen(data_frame, 1)
     self.assertEqual(
         data_frame[simulator.ESTIMATED_CARDINALITY_BASENAME + '1'].iloc[0],
         10)
     self.assertEqual(fake_estimate_noiser._calls, 1)

Пример #4

0

Показать файл

Файл: simulator_test.py Проект: pasin30055/cardinality_estimation_evaluation_framework

def get_simple_simulator(sketch_estimator_config=None):
    if not sketch_estimator_config:
        sketch_estimator_config = SketchEstimatorConfig(
            name='exact_set-lossless',
            sketch_factory=ExactMultiSet,
            estimator=LosslessEstimator())
    set_generator_factory = (set_generator.IndependentSetGenerator.
                             get_generator_factory_with_num_and_size(
                                 universe_size=1, num_sets=1, set_size=1))

    return simulator.Simulator(num_runs=1,
                               set_generator_factory=set_generator_factory,
                               sketch_estimator_config=sketch_estimator_config,
                               sketch_random_state=np.random.RandomState(1),
                               set_random_state=np.random.RandomState(2))

Пример #5

0

Показать файл

Файл: evaluate_adbf_3.py Проект: googleinterns/amt-xpub

def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    noiser_flip_probability = 1 / (1 + np.exp(FLAGS.noiser_epsilon))
    universe_size = int(100 * FLAGS.sketch_size)

    ## config all decay rates
    estimator_config_list = []
    for a in FLAGS.exponential_bloom_filter_decay_rate:

        estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
            name='exp_BF_' + str(int(a)),
            sketch_factory=ExponentialBloomFilter.get_sketch_factory(
                FLAGS.sketch_size, a),
            estimator=FirstMomentEstimator(
                method='exp',
                denoiser=SurrealDenoiser(probability=noiser_flip_probability)),
            sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon))

        estimator_config_list += [estimator_config_exponential_bloom_filter]

    # config evaluation
    scenario_config_list = []
    for set_size_ratio in FLAGS.set_size_ratio:
        set_size = int(set_size_ratio * FLAGS.sketch_size)
        ## list scenarios
        scenario_config_list += [
            configs.ScenarioConfig(
                name=str(int(set_size_ratio)),
                set_generator_factory=(set_generator.IndependentSetGenerator.
                                       get_generator_factory_with_num_and_size(
                                           universe_size=universe_size,
                                           num_sets=FLAGS.number_of_sets,
                                           set_size=set_size)))
        ]
    evaluation_config = configs.EvaluationConfig(
        name='3_vary_decay_rate_' + str(int(FLAGS.sketch_size / 1000)) + "k",
        num_runs=FLAGS.number_of_trials,
        scenario_config_list=scenario_config_list)

    generate_results = evaluator.Evaluator(
        evaluation_config=evaluation_config,
        sketch_estimator_config_list=estimator_config_list,
        run_name="eval_adbf_result",
        out_dir=".",
        workers=10)
    generate_results()

Пример #6

0

Показать файл

Файл: evaluate_adbf_5.py Проект: googleinterns/amt-xpub

def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    ## config all decay rates
    estimator_config_list = []
    for sketch_size in FLAGS.sketch_size:
        for epsilon in FLAGS.noiser_epsilon:
            estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
                ## flipping prob
                name=str(int(sketch_size / 1000)) + "k_" + \
                    "{:.2f}".format(1 / (1 + np.exp(epsilon))),
                sketch_factory=ExponentialBloomFilter.get_sketch_factory(
                    sketch_size, FLAGS.exponential_bloom_filter_decay_rate),
                estimator=FirstMomentEstimator(
                    method='exp',
                    denoiser=SurrealDenoiser(epsilon)),
                sketch_noiser=BlipNoiser(epsilon))
            estimator_config_list += [
                estimator_config_exponential_bloom_filter
            ]

    # config evaluation
    scenario_config_list = []
    for universe_size in FLAGS.universe_size:
        scenario_config_list += [
            configs.ScenarioConfig(
                name="{:.1f}".format(universe_size / 1000000),
                set_generator_factory=(set_generator.IndependentSetGenerator.
                                       get_generator_factory_with_num_and_size(
                                           universe_size=universe_size,
                                           num_sets=FLAGS.number_of_sets,
                                           set_size=FLAGS.set_size)))
        ]
    evaluation_config = configs.EvaluationConfig(
        name='5_prediction',
        num_runs=FLAGS.number_of_trials,
        scenario_config_list=scenario_config_list)

    generate_results = evaluator.Evaluator(
        evaluation_config=evaluation_config,
        sketch_estimator_config_list=estimator_config_list,
        run_name="eval_adbf_result",
        out_dir=".",
        workers=10)
    generate_results()

Пример #7

0

Показать файл

Файл: simulator_test.py Проект: pasin30055/cardinality_estimation_evaluation_framework

 def test_get_sketch_different_runs_different_random_state(self):
     sketch_estimator_config = SketchEstimatorConfig(
         name='random_sketch-estimator_for_test_random_seed',
         sketch_factory=RandomSketchForTestRandomSeed,
         estimator=EstimatorForTestRandomSeed())
     set_generator_factory = (set_generator.IndependentSetGenerator.
                              get_generator_factory_with_num_and_size(
                                  universe_size=1, num_sets=1, set_size=1))
     sim = simulator.Simulator(
         num_runs=2,
         set_generator_factory=set_generator_factory,
         sketch_estimator_config=sketch_estimator_config)
     df, _ = sim()
     self.assertNotEqual(
         df.loc[df['run_index'] == 0,
                simulator.ESTIMATED_CARDINALITY_BASENAME + '1'].values,
         df.loc[df['run_index'] == 1,
                simulator.ESTIMATED_CARDINALITY_BASENAME + '1'].values)

Пример #8

0

Показать файл

Файл: simulator_test.py Проект: pasin30055/cardinality_estimation_evaluation_framework

    def test_simulator_run_all_and_aggregate_multiple_runs(self):
        sketch_estimator_config = SketchEstimatorConfig(
            name='exact_set-lossless',
            sketch_factory=ExactMultiSet,
            estimator=LosslessEstimator())
        set_generator_factory = (set_generator.IndependentSetGenerator.
                                 get_generator_factory_with_num_and_size(
                                     universe_size=1, num_sets=1, set_size=1))

        sim = simulator.Simulator(
            num_runs=5,
            set_generator_factory=set_generator_factory,
            sketch_estimator_config=sketch_estimator_config)

        data_frames = sim.run_all_and_aggregate()
        self.assertLen(data_frames, 2)
        self.assertLen(data_frames[0], 5)
        for pub in data_frames[0]['num_sets']:
            self.assertEqual(pub, 1)

Пример #9

0

Показать файл

Файл: simulator_test.py Проект: pasin30055/cardinality_estimation_evaluation_framework

    def test_simulator_run_all_and_aggregate_with_noise(self):
        rs = np.random.RandomState(3)
        sketch_estimator_config = SketchEstimatorConfig(
            name='exact_set-lossless',
            sketch_factory=ExactMultiSet,
            estimator=LosslessEstimator(),
            sketch_noiser=AddRandomElementsNoiser(num_random_elements=3,
                                                  random_state=rs))
        sim = get_simple_simulator(sketch_estimator_config)

        data_frames = sim.run_all_and_aggregate()
        self.assertLen(data_frames, 2)
        for pub in data_frames[0]['num_sets']:
            self.assertEqual(pub, 1)
        self.assertEqual(
            data_frames[0][simulator.ESTIMATED_CARDINALITY_BASENAME + '1'][0],
            4)
        self.assertEqual(
            data_frames[0][simulator.TRUE_CARDINALITY_BASENAME + '1'][0], 1)
        self.assertEqual(
            data_frames[0][simulator.RELATIVE_ERROR_BASENAME + '1'][0], 3)

Пример #10

0

Показать файл

Файл: evaluate_adbf_2.py Проект: googleinterns/amt-xpub

def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    noiser_flip_probability = 1 / (1 + np.exp(FLAGS.noiser_epsilon))

    ## list three adbf estimators
    estimator_config_geometric_bloom_filter = SketchEstimatorConfig(
        name='geo_BF',
        sketch_factory=GeometricBloomFilter.get_sketch_factory(
            FLAGS.sketch_size, FLAGS.geometric_bloom_filter_probability),
        estimator=FirstMomentEstimator(
            method='geo',
            denoiser=SurrealDenoiser(
                probability=noiser_flip_probability)), 
        sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon))

    estimator_config_logarithmic_bloom_filter = SketchEstimatorConfig(
        name='log_BF',
        sketch_factory=LogarithmicBloomFilter.get_sketch_factory(
            FLAGS.sketch_size),
        estimator=FirstMomentEstimator(
            method='log',
            denoiser=SurrealDenoiser(
                probability=noiser_flip_probability)), 
        sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon))

    estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
        name='exp_BF',
        sketch_factory=ExponentialBloomFilter.get_sketch_factory(
            FLAGS.sketch_size, FLAGS.exponential_bloom_filter_decay_rate),
        estimator=FirstMomentEstimator(
            method='exp',
            denoiser=SurrealDenoiser(
                probability=noiser_flip_probability)), 
        sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon))
        
    estimator_config_list = [
        estimator_config_geometric_bloom_filter,
        estimator_config_logarithmic_bloom_filter,
        estimator_config_exponential_bloom_filter,
    ]

    # list scenarios of different set sizes
    scenario_config_list = []
    for set_size_ratio in FLAGS.set_size_ratio: 
        set_size = int(set_size_ratio * FLAGS.sketch_size)
        scenario_config_list += [
            configs.ScenarioConfig(
                name="{:.1f}".format(set_size_ratio),
                set_generator_factory=(
                    set_generator.IndependentSetGenerator
                    .get_generator_factory_with_num_and_size(
                        universe_size=FLAGS.universe_size, 
                        num_sets=FLAGS.number_of_sets, 
                        set_size=set_size)))
        ]
    evaluation_config = configs.EvaluationConfig(
        name='2_vary_set_size',
        num_runs=FLAGS.number_of_trials,
        scenario_config_list=scenario_config_list)

    generate_results = evaluator.Evaluator(
        evaluation_config=evaluation_config,
        sketch_estimator_config_list=estimator_config_list,
        run_name="eval_adbf_result",
        out_dir=".",
        workers=10)
    generate_results()

Пример #11

0

Показать файл

Файл: interoperability_test.py Проект: pasin30055/cardinality_estimation_evaluation_framework

    def setUp(self):
        super(InteroperabilityTest, self).setUp()
        self.number_of_trials = 2
        self.universe_size = 2000
        self.set_size_list = [5, 7, 9]
        self.large_set_size = 6
        self.small_set_size = 3
        self.sketch_size = 128
        self.number_of_sets = 3
        self.set_size = 50
        self.num_large_sets = 1
        self.num_small_sets = 3
        self.order = set_generator.ORDER_RANDOM
        self.user_activity_association = (
            set_generator.USER_ACTIVITY_ASSOCIATION_INDEPENDENT)
        self.shared_prop = 0.2
        self.num_bloom_filter_hashes = 2
        self.exponential_bloom_filter_decay_rate = 10
        self.geometic_bloom_filter_probability = 0.08
        self.noiser_epsilon = np.log(3)
        self.noiser_flip_probability = .25

        self.set_random_state = np.random.RandomState(42)
        self.sketch_random_state = np.random.RandomState(137)
        self.noise_random_state = np.random.RandomState(3)

        # non-noised estimators
        estimator_config_cascading_legions = SketchEstimatorConfig(
            name='cascading_legions',
            sketch_factory=CascadingLegions.get_sketch_factory(
                self.sketch_size, self.sketch_size),
            estimator=Estimator())

        estimator_config_bloom_filter = SketchEstimatorConfig(
            name='bloom_filter-union_estimator',
            sketch_factory=BloomFilter.get_sketch_factory(
                self.sketch_size, self.num_bloom_filter_hashes),
            estimator=UnionEstimator())

        estimator_config_geometric_bloom_filter = SketchEstimatorConfig(
            name='geo_bloom_filter-first_moment_geo',
            sketch_factory=GeometricBloomFilter.get_sketch_factory(
                self.sketch_size, self.geometic_bloom_filter_probability),
            estimator=FirstMomentEstimator(method='geo'))

        estimator_config_logarithmic_bloom_filter = SketchEstimatorConfig(
            name='log_bloom_filter-first_moment_log',
            sketch_factory=LogarithmicBloomFilter.get_sketch_factory(
                self.sketch_size),
            estimator=FirstMomentEstimator(method='log'))

        estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
            name='exp_bloom_filter-first_moment_exp',
            sketch_factory=ExponentialBloomFilter.get_sketch_factory(
                self.sketch_size, self.exponential_bloom_filter_decay_rate),
            estimator=FirstMomentEstimator(method='exp'))

        estimator_config_voc = SketchEstimatorConfig(
            name='vector_of_counts-sequential',
            sketch_factory=VectorOfCounts.get_sketch_factory(self.sketch_size),
            estimator=SequentialEstimator())

        estimator_config_exact = SketchEstimatorConfig(
            name='exact_set-lossless',
            sketch_factory=ExactMultiSet.get_sketch_factory(),
            estimator=LosslessEstimator())

        estimator_config_hll = SketchEstimatorConfig(
            name='hyper_log_log',
            sketch_factory=HyperLogLogPlusPlus.get_sketch_factory(
                self.sketch_size),
            estimator=HllCardinality())

        estimator_config_expadbf_first_moment_global_dp = SketchEstimatorConfig(
            name='estimator_config_expadbf_first_moment_global_d',
            sketch_factory=ExponentialBloomFilter.get_sketch_factory(
                length=10**5, decay_rate=10),
            estimator=FirstMomentEstimator(
                method=FirstMomentEstimator.METHOD_EXP,
                noiser=GeometricEstimateNoiser(epsilon=math.log(3))))

        config_list = [
            estimator_config_exact,
            estimator_config_cascading_legions,
            estimator_config_bloom_filter,
            estimator_config_logarithmic_bloom_filter,
            estimator_config_exponential_bloom_filter,
            estimator_config_geometric_bloom_filter,
            estimator_config_voc,
            estimator_config_hll,
            estimator_config_expadbf_first_moment_global_dp,
        ]

        self.name_to_non_noised_estimator_config = {
            config.name: config
            for config in config_list
        }

        # noised estimators
        noised_estimator_config_cascading_legions = SketchEstimatorConfig(
            name='cascading_legions',
            sketch_factory=CascadingLegions.get_sketch_factory(
                self.sketch_size, self.sketch_size),
            estimator=Estimator(),
            sketch_noiser=Noiser(self.noiser_flip_probability))

        noised_estimator_config_bloom_filter = SketchEstimatorConfig(
            name='bloom_filter-union_estimator',
            sketch_factory=BloomFilter.get_sketch_factory(
                self.sketch_size, self.num_bloom_filter_hashes),
            estimator=UnionEstimator(),
            sketch_noiser=BlipNoiser(self.noiser_epsilon,
                                     self.noise_random_state))

        noised_estimator_config_geometric_bloom_filter = SketchEstimatorConfig(
            name='geo_bloom_filter-first_moment_geo',
            sketch_factory=GeometricBloomFilter.get_sketch_factory(
                self.sketch_size, self.geometic_bloom_filter_probability),
            estimator=FirstMomentEstimator(
                method='geo', denoiser=SurrealDenoiser(epsilon=math.log(3))),
            sketch_noiser=BlipNoiser(self.noiser_epsilon,
                                     self.noise_random_state))

        noised_estimator_config_logarithmic_bloom_filter = SketchEstimatorConfig(
            name='log_bloom_filter-first_moment_log',
            sketch_factory=LogarithmicBloomFilter.get_sketch_factory(
                self.sketch_size),
            estimator=FirstMomentEstimator(
                method='log', denoiser=SurrealDenoiser(epsilon=math.log(3))),
            sketch_noiser=BlipNoiser(self.noiser_epsilon,
                                     self.noise_random_state))

        noised_estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
            name='exp_bloom_filter-first_moment_exp',
            sketch_factory=ExponentialBloomFilter.get_sketch_factory(
                self.sketch_size, self.exponential_bloom_filter_decay_rate),
            estimator=FirstMomentEstimator(
                method='exp', denoiser=SurrealDenoiser(epsilon=math.log(3))),
            sketch_noiser=BlipNoiser(self.noiser_epsilon,
                                     self.noise_random_state))

        noised_estimator_config_voc = SketchEstimatorConfig(
            name='vector_of_counts-sequential',
            sketch_factory=VectorOfCounts.get_sketch_factory(self.sketch_size),
            estimator=SequentialEstimator(),
            sketch_noiser=LaplaceNoiser())

        noised_estimator_config_exact = SketchEstimatorConfig(
            name='exact_set-lossless',
            sketch_factory=ExactMultiSet.get_sketch_factory(),
            estimator=LosslessEstimator(),
            sketch_noiser=AddRandomElementsNoiser(1, self.noise_random_state))

        noised_config_list = [
            noised_estimator_config_exact,
            noised_estimator_config_cascading_legions,
            noised_estimator_config_bloom_filter,
            noised_estimator_config_logarithmic_bloom_filter,
            noised_estimator_config_exponential_bloom_filter,
            noised_estimator_config_geometric_bloom_filter,
            noised_estimator_config_voc,
        ]

        self.name_to_noised_estimator_config = {
            config.name: config
            for config in noised_config_list
        }

Пример #12

0

Показать файл

Файл: evaluate_adbf_1.py Проект: googleinterns/amt-xpub

def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    ## list all estimators
    estimator_config_list = []
    for epsilon in FLAGS.noiser_epsilon:
        ## flipping prob
        noiser_flip_probability = 1 / (1 + np.exp(epsilon))

        # estimator_config_bloom_filter = SketchEstimatorConfig(
        #     name='unif_BF_' + "{:.2f}".format(noiser_flip_probability),
        #     sketch_factory=BloomFilter.get_sketch_factory(
        #         FLAGS.sketch_size, FLAGS.num_bloom_filter_hashes),
        #     estimator=UnionEstimator(),
        #     sketch_noiser=BlipNoiser(epsilon))

        estimator_config_geometric_bloom_filter = SketchEstimatorConfig(
            name='geo_BF_' + "{:.2f}".format(noiser_flip_probability),
            sketch_factory=GeometricBloomFilter.get_sketch_factory(
                FLAGS.sketch_size, FLAGS.geometric_bloom_filter_probability),
            estimator=FirstMomentEstimator(method='geo',
                                           denoiser=SurrealDenoiser(epsilon)),
            sketch_noiser=BlipNoiser(epsilon))

        estimator_config_logarithmic_bloom_filter = SketchEstimatorConfig(
            name='log_BF_' + "{:.2f}".format(noiser_flip_probability),
            sketch_factory=LogarithmicBloomFilter.get_sketch_factory(
                FLAGS.sketch_size),
            estimator=FirstMomentEstimator(method='log',
                                           denoiser=SurrealDenoiser(epsilon)),
            sketch_noiser=BlipNoiser(epsilon))

        estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
            name='exp_BF_' + "{:.2f}".format(noiser_flip_probability),
            sketch_factory=ExponentialBloomFilter.get_sketch_factory(
                FLAGS.sketch_size, FLAGS.exponential_bloom_filter_decay_rate),
            estimator=FirstMomentEstimator(method='exp',
                                           denoiser=SurrealDenoiser(epsilon)),
            sketch_noiser=BlipNoiser(epsilon))

        estimator_config_list += [
            # estimator_config_bloom_filter,
            estimator_config_geometric_bloom_filter,
            estimator_config_logarithmic_bloom_filter,
            estimator_config_exponential_bloom_filter,
        ]

    # config evaluation
    evaluation_config = configs.EvaluationConfig(
        name='1_vary_flip_prob',
        num_runs=FLAGS.number_of_trials,
        scenario_config_list=[
            configs.ScenarioConfig(
                name='independent',
                set_generator_factory=(set_generator.IndependentSetGenerator.
                                       get_generator_factory_with_num_and_size(
                                           universe_size=FLAGS.universe_size,
                                           num_sets=FLAGS.number_of_sets,
                                           set_size=FLAGS.set_size)))
        ])

    generate_results = evaluator.Evaluator(
        evaluation_config=evaluation_config,
        sketch_estimator_config_list=estimator_config_list,
        run_name="eval_adbf_result",
        out_dir=".",
        workers=10)
    generate_results()

Пример #13

0

Показать файл

Файл: evaluation_configs.py Проект: hpnhxxwn/cardinality_estimation_evaluation_framework

    conf().name: conf for conf in EVALUATION_CONFIGS_TUPLE
}

EVALUATION_CONFIG_NAMES = tuple(NAME_TO_EVALUATION_CONFIGS.keys())


# Document the estimators.
# The name attribute of the SketchEstimatorConfig should conform to
# name_of_sketch-param_of_sketch-epsilon_value-estimator_specification.
# For example, if a user want to evaluate Bloom Filter of length 1000 with
# epsilon 0.1, and the UnionEstimator, then the name could be:
# bloom_filter-1e4-0.1-union.
LOG_BLOOM_FILTER_1E5_LN3_FIRST_MOMENT_LOG = SketchEstimatorConfig(
    name='log_bloom_filter-1e5-ln3-first_moment_log',
    sketch_factory=bloom_filters.LogarithmicBloomFilter.get_sketch_factory(
        length=10**5),
    estimator=bloom_filters.FirstMomentEstimator(
        method=bloom_filters.FirstMomentEstimator.METHOD_LOG,
        denoiser=bloom_filters.SurrealDenoiser(probability=0.25)),
    noiser=bloom_filters.BlipNoiser(epsilon=np.log(3)))

LOG_BLOOM_FILTER_1E5_0_FIRST_MOMENT_LOG = SketchEstimatorConfig(
    name='log_bloom_filter-1e5-0-first_moment_log',
    sketch_factory=bloom_filters.LogarithmicBloomFilter.get_sketch_factory(
        length=10**5),
    estimator=bloom_filters.FirstMomentEstimator(
        method=bloom_filters.FirstMomentEstimator.METHOD_LOG),
    noiser=None)

LIQUID_LEGIONS_1E5_10_LN3_SEQUENTIAL = SketchEstimatorConfig(
    name='liquid_legions-1e5_10-ln3-sequential',
    sketch_factory=liquid_legions.LiquidLegions.get_sketch_factory(

Python SketchEstimatorConfig примеры использования