示例#1
0
    def test_compute_trial_results_path(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                                 "pdf1")
            pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2")
            data_set = DataSet([pdf1, pdf2], "dataset")
            data_design = DataDesign(join(d, "data_design"))
            data_design.add(data_set)

            msd = ModelingStrategyDescriptor("strategy", {},
                                             "single_pub_model", {},
                                             "multi_pub_model", {})
            sparams = SystemParameters(
                [0.03, 0.05],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "tps")
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial("edir", data_design, "dataset",
                                      trial_descriptor)

            actual = trial._compute_trial_results_path()
            expected = "{}/{}/{},{},{},{}".format(
                "edir",
                "dataset",
                "strategy,single_pub_model,multi_pub_model",
                "spends=(0.03,0.05),decay_rate=13,sketch_size=1000000.0",
                "epsilon=1.0,delta=0.01,replica_id=3,max_frequency=5",
                "test_point_strategy=tps.csv",
            )
            self.assertEqual(actual, expected)
示例#2
0
 def test_lookup(self):
     with TemporaryDirectory() as d:
         dd1 = DataDesign(d)
         dd1.add(self.data_set1)
         dd1.add(self.data_set2)
         dd2 = DataDesign(d)
         ds1 = dd2.by_name("ds1")
         self.assertEqual(ds1.reach_by_impressions([4, 2]).reach(), 4)
         ds2 = dd2.by_name("ds2")
         self.assertEqual(ds2.reach_by_impressions([4, 2]).reach(), 3)
示例#3
0
 def test_properties(self):
     with TemporaryDirectory() as d:
         dd = DataDesign(d)
         self.assertEqual(dd.count, 0)
         self.assertEqual(dd.names, [])
         dd.add(self.data_set1)
         self.assertEqual(dd.count, 1)
         self.assertEqual(dd.names, ["ds1"])
         dd.add(self.data_set2)
         self.assertEqual(dd.count, 2)
         self.assertEqual(dd.names, ["ds1", "ds2"])
示例#4
0
    def test_evaluate(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                                 "pdf1")
            pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2")
            data_set = DataSet([pdf1, pdf2], "dataset")
            data_design_dir = join(d, "data_design")
            experiment_dir = join(d, "experiments")
            data_design = DataDesign(data_design_dir)
            data_design.add(data_set)

            MODELING_STRATEGIES["fake"] = FakeModelingStrategy
            TEST_POINT_STRATEGIES[
                "fake_tps"] = lambda ds, rng: FakeTestPointGenerator(
                ).test_points()

            msd = ModelingStrategyDescriptor("fake", {"x": 1}, "goerg", {},
                                             "pairwise_union", {})
            sparams1 = SystemParameters(
                [0.03, 0.05],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            sparams2 = SystemParameters(
                [0.05, 0.03],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams1 = ExperimentParameters(PrivacyBudget(1.0, 0.01), 1, 5,
                                            "fake_tps")
            eparams2 = ExperimentParameters(PrivacyBudget(0.5, 0.001), 1, 5,
                                            "fake_tps")

            trial_descriptors = [
                TrialDescriptor(msd, sparams1, eparams1),
                TrialDescriptor(msd, sparams1, eparams2),
                TrialDescriptor(msd, sparams2, eparams1),
                TrialDescriptor(msd, sparams2, eparams2),
            ]

            exp = Experiment(experiment_dir, data_design, "dataset",
                             trial_descriptors)
            trials = exp.generate_trials()
            self.assertLen(trials, 4)
示例#5
0
    def test_make_independent_vars_dataframe(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                                 "pdf1")
            pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2")
            data_set = DataSet([pdf1, pdf2], "dataset")
            data_design = DataDesign(join(d, "data_design"))
            data_design.add(data_set)

            msd = ModelingStrategyDescriptor("strategy", {},
                                             "single_pub_model", {},
                                             "multi_pub_model", {})
            sparams = SystemParameters(
                [0.03, 0.05],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "test_point_strategy")
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial("edir", data_design, "dataset",
                                      trial_descriptor)

            actual = trial._make_independent_vars_dataframe()

            expected_trial_name = "strategy,single_pub_model,multi_pub_model,spends=(0.03,0.05),decay_rate=13,sketch_size=1000000.0,epsilon=1.0,delta=0.01,replica_id=3,max_frequency=5,test_point_strategy=test_point_strategy"

            expected = pd.DataFrame({
                "dataset": ["dataset"],
                "trial": [expected_trial_name],
                "replica_id": [3],
                "single_pub_model": ["single_pub_model"],
                "multi_pub_model": ["multi_pub_model"],
                "strategy": ["strategy"],
                "liquid_legions_sketch_size": [1e6],
                "liquid_legions_decay_rate": [13],
                "maximum_reach": [4],
                "ncampaigns": [2],
                "largest_pub_reach": [3],
                "max_frequency": [5],
                "average_spend_fraction": [0.04],
            })
            pd.testing.assert_frame_equal(actual, expected)
示例#6
0
    def test_evaluate_single_publisher_model(self):
        with TemporaryDirectory() as d:
            data1 = HeterogeneousImpressionGenerator(1000,
                                                     gamma_shape=1.0,
                                                     gamma_scale=3.0)()
            pdf1 = PublisherData(FixedPriceGenerator(0.1)(data1))
            data_set = DataSet([pdf1], "dataset")
            data_design_dir = join(d, "data_design")
            experiment_dir = join(d, "experiments")
            data_design = DataDesign(data_design_dir)
            data_design.add(data_set)

            msd = ModelingStrategyDescriptor("single_publisher", {}, "goerg",
                                             {}, "pairwise_union", {})
            sparams = SystemParameters(
                [0.5],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "grid", {"grid_size": 5})
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial(
                experiment_dir,
                data_design,
                "dataset",
                trial_descriptor,
                analysis_type="single_pub",
            )
            result = trial.evaluate(seed=1)
            # We don't check each column in the resulting dataframe, because these have
            # been checked by the preceding unit tests.  However, we make a few strategic
            # probes.
            self.assertEqual(result.shape[0], 1)
            self.assertAlmostEqual(result["relative_error_at_100"][0],
                                   0.0,
                                   delta=0.01)
            self.assertGreater(result["max_nonzero_frequency_from_halo"][0], 0)
            self.assertEqual(result["max_nonzero_frequency_from_data"][0], 5)
示例#7
0
    def test_evaluate_when_there_is_a_modeling_exception(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (3, 0.04), (4, 0.05)],
                                 "pdf1")
            data_set = DataSet([pdf1], "dataset")
            data_design_dir = join(d, "data_design")
            experiment_dir = join(d, "experiments")
            data_design = DataDesign(data_design_dir)
            data_design.add(data_set)

            MODELING_STRATEGIES["fake"] = GoergModelingStrategy
            TEST_POINT_STRATEGIES["fake_tps"] = GoergTestPointGenerator

            msd = ModelingStrategyDescriptor("fake", {}, "goerg", {},
                                             "pairwise_union", {})
            sparams = SystemParameters(
                [0.5],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "fake_tps")
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial(experiment_dir, data_design, "dataset",
                                      trial_descriptor)
            result = trial.evaluate(seed=1)
            # We don't check each column in the resulting dataframe, because these have
            # been checked by the preceding unit tests.  However, we make a few strategic
            # probes.
            self.assertEqual(result.shape[0], 1)
            self.assertEqual(result["dataset"][0], "dataset")
            self.assertEqual(result["replica_id"][0], 3)
            self.assertEqual(result["privacy_budget_epsilon"][0], 1.0)
            self.assertEqual(result["model_succeeded"][0], 0)
            self.assertEqual(
                result["model_exception"][0],
                "Cannot fit Goerg model when impressions <= reach.",
            )
示例#8
0
    def test_evaluate_singe_publisher_model_with_exception(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (3, 0.04), (4, 0.05)],
                                 "pdf1")
            data_set = DataSet([pdf1], "dataset")
            data_design_dir = join(d, "data_design")
            experiment_dir = join(d, "experiments")
            data_design = DataDesign(data_design_dir)
            data_design.add(data_set)

            MODELING_STRATEGIES["fake"] = GoergModelingStrategy
            TEST_POINT_STRATEGIES["fake_tps"] = GoergTestPointGenerator

            msd = ModelingStrategyDescriptor("fake", {}, "goerg", {},
                                             "pairwise_union", {})
            sparams = SystemParameters(
                [0.5],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "fake_tps")
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial(
                experiment_dir,
                data_design,
                "dataset",
                trial_descriptor,
                analysis_type="single_pub",
            )
            result = trial.evaluate(seed=1)
            # We don't check each column in the resulting dataframe, because these have
            # been checked by the preceding unit tests.  However, we make a few strategic
            # probes.
            self.assertEqual(result.shape[0], 1)
            self.assertTrue(math.isnan(result["relative_error_at_100"][0]))
 def __call__(self) -> DataDesign:
     data_design = DataDesign(dirpath=self._output_dir)
     for data_set_parameters in self._fetch_data_set_parameters_list():
         data_design.add(self._generate_data_set(data_set_parameters))
     return data_design