def test_compute_trial_results_path(self): with TemporaryDirectory() as d: pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)], "pdf1") pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2") data_set = DataSet([pdf1, pdf2], "dataset") data_design = DataDesign(join(d, "data_design")) data_design.add(data_set) msd = ModelingStrategyDescriptor("strategy", {}, "single_pub_model", {}, "multi_pub_model", {}) sparams = SystemParameters( [0.03, 0.05], LiquidLegionsParameters(13, 1e6, 1), np.random.default_rng(), ) eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5, "tps") trial_descriptor = TrialDescriptor(msd, sparams, eparams) trial = ExperimentalTrial("edir", data_design, "dataset", trial_descriptor) actual = trial._compute_trial_results_path() expected = "{}/{}/{},{},{},{}".format( "edir", "dataset", "strategy,single_pub_model,multi_pub_model", "spends=(0.03,0.05),decay_rate=13,sketch_size=1000000.0", "epsilon=1.0,delta=0.01,replica_id=3,max_frequency=5", "test_point_strategy=tps.csv", ) self.assertEqual(actual, expected)
def test_lookup(self): with TemporaryDirectory() as d: dd1 = DataDesign(d) dd1.add(self.data_set1) dd1.add(self.data_set2) dd2 = DataDesign(d) ds1 = dd2.by_name("ds1") self.assertEqual(ds1.reach_by_impressions([4, 2]).reach(), 4) ds2 = dd2.by_name("ds2") self.assertEqual(ds2.reach_by_impressions([4, 2]).reach(), 3)
def test_properties(self): with TemporaryDirectory() as d: dd = DataDesign(d) self.assertEqual(dd.count, 0) self.assertEqual(dd.names, []) dd.add(self.data_set1) self.assertEqual(dd.count, 1) self.assertEqual(dd.names, ["ds1"]) dd.add(self.data_set2) self.assertEqual(dd.count, 2) self.assertEqual(dd.names, ["ds1", "ds2"])
def test_evaluate(self): with TemporaryDirectory() as d: pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)], "pdf1") pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2") data_set = DataSet([pdf1, pdf2], "dataset") data_design_dir = join(d, "data_design") experiment_dir = join(d, "experiments") data_design = DataDesign(data_design_dir) data_design.add(data_set) MODELING_STRATEGIES["fake"] = FakeModelingStrategy TEST_POINT_STRATEGIES[ "fake_tps"] = lambda ds, rng: FakeTestPointGenerator( ).test_points() msd = ModelingStrategyDescriptor("fake", {"x": 1}, "goerg", {}, "pairwise_union", {}) sparams1 = SystemParameters( [0.03, 0.05], LiquidLegionsParameters(13, 1e6, 1), np.random.default_rng(), ) sparams2 = SystemParameters( [0.05, 0.03], LiquidLegionsParameters(13, 1e6, 1), np.random.default_rng(), ) eparams1 = ExperimentParameters(PrivacyBudget(1.0, 0.01), 1, 5, "fake_tps") eparams2 = ExperimentParameters(PrivacyBudget(0.5, 0.001), 1, 5, "fake_tps") trial_descriptors = [ TrialDescriptor(msd, sparams1, eparams1), TrialDescriptor(msd, sparams1, eparams2), TrialDescriptor(msd, sparams2, eparams1), TrialDescriptor(msd, sparams2, eparams2), ] exp = Experiment(experiment_dir, data_design, "dataset", trial_descriptors) trials = exp.generate_trials() self.assertLen(trials, 4)
def test_make_independent_vars_dataframe(self): with TemporaryDirectory() as d: pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)], "pdf1") pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2") data_set = DataSet([pdf1, pdf2], "dataset") data_design = DataDesign(join(d, "data_design")) data_design.add(data_set) msd = ModelingStrategyDescriptor("strategy", {}, "single_pub_model", {}, "multi_pub_model", {}) sparams = SystemParameters( [0.03, 0.05], LiquidLegionsParameters(13, 1e6, 1), np.random.default_rng(), ) eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5, "test_point_strategy") trial_descriptor = TrialDescriptor(msd, sparams, eparams) trial = ExperimentalTrial("edir", data_design, "dataset", trial_descriptor) actual = trial._make_independent_vars_dataframe() expected_trial_name = "strategy,single_pub_model,multi_pub_model,spends=(0.03,0.05),decay_rate=13,sketch_size=1000000.0,epsilon=1.0,delta=0.01,replica_id=3,max_frequency=5,test_point_strategy=test_point_strategy" expected = pd.DataFrame({ "dataset": ["dataset"], "trial": [expected_trial_name], "replica_id": [3], "single_pub_model": ["single_pub_model"], "multi_pub_model": ["multi_pub_model"], "strategy": ["strategy"], "liquid_legions_sketch_size": [1e6], "liquid_legions_decay_rate": [13], "maximum_reach": [4], "ncampaigns": [2], "largest_pub_reach": [3], "max_frequency": [5], "average_spend_fraction": [0.04], }) pd.testing.assert_frame_equal(actual, expected)
def test_evaluate_single_publisher_model(self): with TemporaryDirectory() as d: data1 = HeterogeneousImpressionGenerator(1000, gamma_shape=1.0, gamma_scale=3.0)() pdf1 = PublisherData(FixedPriceGenerator(0.1)(data1)) data_set = DataSet([pdf1], "dataset") data_design_dir = join(d, "data_design") experiment_dir = join(d, "experiments") data_design = DataDesign(data_design_dir) data_design.add(data_set) msd = ModelingStrategyDescriptor("single_publisher", {}, "goerg", {}, "pairwise_union", {}) sparams = SystemParameters( [0.5], LiquidLegionsParameters(13, 1e6, 1), np.random.default_rng(), ) eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5, "grid", {"grid_size": 5}) trial_descriptor = TrialDescriptor(msd, sparams, eparams) trial = ExperimentalTrial( experiment_dir, data_design, "dataset", trial_descriptor, analysis_type="single_pub", ) result = trial.evaluate(seed=1) # We don't check each column in the resulting dataframe, because these have # been checked by the preceding unit tests. However, we make a few strategic # probes. self.assertEqual(result.shape[0], 1) self.assertAlmostEqual(result["relative_error_at_100"][0], 0.0, delta=0.01) self.assertGreater(result["max_nonzero_frequency_from_halo"][0], 0) self.assertEqual(result["max_nonzero_frequency_from_data"][0], 5)
def test_evaluate_when_there_is_a_modeling_exception(self): with TemporaryDirectory() as d: pdf1 = PublisherData([(1, 0.01), (2, 0.02), (3, 0.04), (4, 0.05)], "pdf1") data_set = DataSet([pdf1], "dataset") data_design_dir = join(d, "data_design") experiment_dir = join(d, "experiments") data_design = DataDesign(data_design_dir) data_design.add(data_set) MODELING_STRATEGIES["fake"] = GoergModelingStrategy TEST_POINT_STRATEGIES["fake_tps"] = GoergTestPointGenerator msd = ModelingStrategyDescriptor("fake", {}, "goerg", {}, "pairwise_union", {}) sparams = SystemParameters( [0.5], LiquidLegionsParameters(13, 1e6, 1), np.random.default_rng(), ) eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5, "fake_tps") trial_descriptor = TrialDescriptor(msd, sparams, eparams) trial = ExperimentalTrial(experiment_dir, data_design, "dataset", trial_descriptor) result = trial.evaluate(seed=1) # We don't check each column in the resulting dataframe, because these have # been checked by the preceding unit tests. However, we make a few strategic # probes. self.assertEqual(result.shape[0], 1) self.assertEqual(result["dataset"][0], "dataset") self.assertEqual(result["replica_id"][0], 3) self.assertEqual(result["privacy_budget_epsilon"][0], 1.0) self.assertEqual(result["model_succeeded"][0], 0) self.assertEqual( result["model_exception"][0], "Cannot fit Goerg model when impressions <= reach.", )
def test_evaluate_singe_publisher_model_with_exception(self): with TemporaryDirectory() as d: pdf1 = PublisherData([(1, 0.01), (2, 0.02), (3, 0.04), (4, 0.05)], "pdf1") data_set = DataSet([pdf1], "dataset") data_design_dir = join(d, "data_design") experiment_dir = join(d, "experiments") data_design = DataDesign(data_design_dir) data_design.add(data_set) MODELING_STRATEGIES["fake"] = GoergModelingStrategy TEST_POINT_STRATEGIES["fake_tps"] = GoergTestPointGenerator msd = ModelingStrategyDescriptor("fake", {}, "goerg", {}, "pairwise_union", {}) sparams = SystemParameters( [0.5], LiquidLegionsParameters(13, 1e6, 1), np.random.default_rng(), ) eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5, "fake_tps") trial_descriptor = TrialDescriptor(msd, sparams, eparams) trial = ExperimentalTrial( experiment_dir, data_design, "dataset", trial_descriptor, analysis_type="single_pub", ) result = trial.evaluate(seed=1) # We don't check each column in the resulting dataframe, because these have # been checked by the preceding unit tests. However, we make a few strategic # probes. self.assertEqual(result.shape[0], 1) self.assertTrue(math.isnan(result["relative_error_at_100"][0]))
def __call__(self) -> DataDesign: data_design = DataDesign(dirpath=self._output_dir) for data_set_parameters in self._fetch_data_set_parameters_list(): data_design.add(self._generate_data_set(data_set_parameters)) return data_design