def _setupBraninExperiment(self, n: int, incremental: bool = False) -> Experiment: exp = get_branin_experiment_with_timestamp_map_metric(incremental=incremental) batch = exp.new_batch_trial() batch.add_arms_and_weights(arms=get_branin_arms(n=n, seed=0)) batch.run() batch_2 = exp.new_batch_trial() batch_2.add_arms_and_weights(arms=get_branin_arms(n=3 * n, seed=1)) batch_2.run() return exp
def test_percentile_early_stopping_strategy_validation(self): exp = get_branin_experiment() for i in range(5): trial = exp.new_trial().add_arm( arm=get_branin_arms(n=1, seed=i)[0]) trial.run() trial.mark_as(status=TrialStatus.COMPLETED) early_stopping_strategy = PercentileEarlyStoppingStrategy() idcs = set(exp.trials.keys()) exp.attach_data(data=exp.fetch_data()) # Non-MapData attached should_stop = early_stopping_strategy.should_stop_trials_early( trial_indices=idcs, experiment=exp) self.assertEqual(should_stop, {}) exp = get_branin_experiment_with_timestamp_map_metric(rate=0.5) for i in range(5): trial = exp.new_trial().add_arm( arm=get_branin_arms(n=1, seed=i)[0]) trial.run() # No data attached should_stop = early_stopping_strategy.should_stop_trials_early( trial_indices=idcs, experiment=exp) self.assertEqual(should_stop, {}) exp.attach_data(data=exp.fetch_data()) # Not enough learning curves early_stopping_strategy = PercentileEarlyStoppingStrategy( min_curves=6, ) should_stop = early_stopping_strategy.should_stop_trials_early( trial_indices=idcs, experiment=exp) self.assertEqual(should_stop, {}) # Most recent progression below minimum early_stopping_strategy = PercentileEarlyStoppingStrategy( min_progression=3, ) should_stop = early_stopping_strategy.should_stop_trials_early( trial_indices=idcs, experiment=exp) self.assertEqual(should_stop, {}) # True objective metric name self.assertIsNone( early_stopping_strategy.true_objective_metric_name) # default none early_stopping_strategy.true_objective_metric_name = "true_obj_metric" self.assertEqual(early_stopping_strategy.true_objective_metric_name, "true_obj_metric")
def _setupBraninExperiment(self, n: int) -> Experiment: exp = Experiment( name="test3", search_space=get_branin_search_space(), tracking_metrics=[BraninMetric(name="b", param_names=["x1", "x2"])], runner=SyntheticRunner(), ) batch = exp.new_batch_trial() batch.add_arms_and_weights(arms=get_branin_arms(n=n, seed=0)) batch.run() batch_2 = exp.new_batch_trial() batch_2.add_arms_and_weights(arms=get_branin_arms(n=3 * n, seed=1)) batch_2.run() return exp
def test_percentile_early_stopping_strategy(self): exp = get_branin_experiment_with_timestamp_map_metric(rate=0.5) for i in range(5): trial = exp.new_trial().add_arm( arm=get_branin_arms(n=1, seed=i)[0]) trial.run() exp.attach_data(data=exp.fetch_data()) """ Data looks like this: arm_name metric_name mean sem trial_index timestamp 0 0_0 branin 146.138620 0.0 0 0 1 0_0 branin 117.388086 0.0 0 1 2 0_0 branin 99.950007 0.0 0 2 3 1_0 branin 113.057480 0.0 1 0 4 1_0 branin 90.815154 0.0 1 1 5 1_0 branin 77.324501 0.0 1 2 6 2_0 branin 44.627226 0.0 2 0 7 2_0 branin 35.847504 0.0 2 1 8 2_0 branin 30.522333 0.0 2 2 9 3_0 branin 143.375669 0.0 3 0 10 3_0 branin 115.168704 0.0 3 1 11 3_0 branin 98.060315 0.0 3 2 12 4_0 branin 65.033535 0.0 4 0 13 4_0 branin 52.239184 0.0 4 1 14 4_0 branin 44.479018 0.0 4 2 Looking at the most recent fidelity only (timestamp==2), we have the following metric values for each trial: 0: 99.950007 <-- worst 3: 98.060315 1: 77.324501 4: 44.479018 2: 30.522333 <-- best """ idcs = set(exp.trials.keys()) early_stopping_strategy = PercentileEarlyStoppingStrategy( percentile_threshold=25, ) should_stop = early_stopping_strategy.should_stop_trials_early( trial_indices=idcs, experiment=exp) self.assertEqual(set(should_stop), {0}) early_stopping_strategy = PercentileEarlyStoppingStrategy( percentile_threshold=50, ) should_stop = early_stopping_strategy.should_stop_trials_early( trial_indices=idcs, experiment=exp) self.assertEqual(set(should_stop), {0, 3}) # respect trial_indices argument should_stop = early_stopping_strategy.should_stop_trials_early( trial_indices={0}, experiment=exp) self.assertEqual(set(should_stop), {0}) early_stopping_strategy = PercentileEarlyStoppingStrategy( percentile_threshold=75, ) should_stop = early_stopping_strategy.should_stop_trials_early( trial_indices=idcs, experiment=exp) self.assertEqual(set(should_stop), {0, 3, 1})
def test_percentile_early_stopping_strategy_validation(self): exp = get_branin_experiment() for i in range(5): trial = exp.new_trial().add_arm( arm=get_branin_arms(n=1, seed=i)[0]) trial.run() early_stopping_strategy = PercentileEarlyStoppingStrategy() idcs = set(exp.trials.keys()) exp.attach_data(data=exp.fetch_data()) # Non-MapData attached with self.assertRaisesRegex(ValueError, "expects MapData"): early_stopping_strategy.should_stop_trials_early( trial_indices=idcs, experiment=exp) exp = get_branin_experiment_with_timestamp_map_metric(rate=0.5) for i in range(5): trial = exp.new_trial().add_arm( arm=get_branin_arms(n=1, seed=i)[0]) trial.run() # No data attached should_stop = early_stopping_strategy.should_stop_trials_early( trial_indices=idcs, experiment=exp) self.assertEqual(should_stop, {}) exp.attach_data(data=exp.fetch_data()) # Not enough learning curves early_stopping_strategy = PercentileEarlyStoppingStrategy( min_curves=6, ) should_stop = early_stopping_strategy.should_stop_trials_early( trial_indices=idcs, experiment=exp) self.assertEqual(should_stop, {}) # Most recent progression below minimum early_stopping_strategy = PercentileEarlyStoppingStrategy( min_progression=3, ) should_stop = early_stopping_strategy.should_stop_trials_early( trial_indices=idcs, experiment=exp) self.assertEqual(should_stop, {})
def test_threshold_early_stopping_strategy(self): exp = get_branin_experiment_with_timestamp_map_metric(rate=0.5) for i in range(5): trial = exp.new_trial().add_arm( arm=get_branin_arms(n=1, seed=i)[0]) trial.run() exp.attach_data(data=exp.fetch_data()) """ Data looks like this: arm_name metric_name mean sem trial_index timestamp 0 0_0 branin 146.138620 0.0 0 0 1 0_0 branin 117.388086 0.0 0 1 2 0_0 branin 99.950007 0.0 0 2 3 1_0 branin 113.057480 0.0 1 0 4 1_0 branin 90.815154 0.0 1 1 5 1_0 branin 77.324501 0.0 1 2 6 2_0 branin 44.627226 0.0 2 0 7 2_0 branin 35.847504 0.0 2 1 8 2_0 branin 30.522333 0.0 2 2 9 3_0 branin 143.375669 0.0 3 0 10 3_0 branin 115.168704 0.0 3 1 11 3_0 branin 98.060315 0.0 3 2 12 4_0 branin 65.033535 0.0 4 0 13 4_0 branin 52.239184 0.0 4 1 14 4_0 branin 44.479018 0.0 4 2 """ idcs = set(exp.trials.keys()) early_stopping_strategy = ThresholdEarlyStoppingStrategy( metric_threshold=50, min_progression=1) should_stop = early_stopping_strategy.should_stop_trials_early( trial_indices=idcs, experiment=exp) self.assertEqual(set(should_stop), {0, 1, 3}) # respect trial_indices argument should_stop = early_stopping_strategy.should_stop_trials_early( trial_indices={0}, experiment=exp) self.assertEqual(set(should_stop), {0}) # test ignore trial indices early_stopping_strategy = ThresholdEarlyStoppingStrategy( metric_threshold=50, min_progression=1, trial_indices_to_ignore={0}) should_stop = early_stopping_strategy.should_stop_trials_early( trial_indices=idcs, experiment=exp) self.assertEqual(set(should_stop), {1, 3}) # test did not reach min progression early_stopping_strategy = ThresholdEarlyStoppingStrategy( metric_threshold=50, min_progression=3) should_stop = early_stopping_strategy.should_stop_trials_early( trial_indices=idcs, experiment=exp) self.assertEqual(should_stop, {})
def testExperimentWithoutName(self): exp = Experiment( search_space=get_branin_search_space(), tracking_metrics=[BraninMetric(name="b", param_names=["x1", "x2"])], runner=SyntheticRunner(), ) self.assertEqual("Experiment(None)", str(exp)) batch = exp.new_batch_trial() batch.add_arms_and_weights(arms=get_branin_arms(n=5, seed=0)) batch.run() self.assertEqual(batch.run_metadata, {"name": "0"})
def testMTExperimentFlow(self): self.assertTrue(self.experiment.supports_trial_type("type1")) self.assertTrue(self.experiment.supports_trial_type("type2")) self.assertFalse(self.experiment.supports_trial_type(None)) n = 10 arms = get_branin_arms(n=n, seed=0) b1 = self.experiment.new_batch_trial() b1.add_arms_and_weights(arms=arms) self.assertEqual(b1.trial_type, "type1") b1.run() self.assertEqual(b1.run_metadata["dummy_metadata"], "dummy1") self.experiment.update_runner("type2", SyntheticRunner(dummy_metadata="dummy3")) b2 = self.experiment.new_batch_trial() b2.trial_type = "type2" b2.add_arms_and_weights(arms=arms) self.assertEqual(b2.trial_type, "type2") b2.run() self.assertEqual(b2.run_metadata["dummy_metadata"], "dummy3") df = self.experiment.fetch_data().df for _, row in df.iterrows(): # Make sure proper metric present for each batch only self.assertEqual(row["metric_name"], "m1" if row["trial_index"] == 0 else "m2") arm_0_slice = df.loc[df["arm_name"] == "0_0"] self.assertNotEqual( float(arm_0_slice[df["trial_index"] == 0]["mean"]), float(arm_0_slice[df["trial_index"] == 1]["mean"]), ) self.assertEqual(len(df), 2 * n) self.assertEqual(self.experiment.default_trials, {0}) # Set 2 metrics to be equal self.experiment.update_tracking_metric(BraninMetric( "m2", ["x1", "x2"]), trial_type="type2") df = self.experiment.fetch_data().df arm_0_slice = df.loc[df["arm_name"] == "0_0"] self.assertAlmostEqual( float(arm_0_slice[df["trial_index"] == 0]["mean"]), float(arm_0_slice[df["trial_index"] == 1]["mean"]), places=10, )
def testStatusQuoSetter(self): sq_parameters = self.experiment.status_quo.parameters self.experiment.status_quo = None self.assertIsNone(self.experiment.status_quo) # Verify normal update sq_parameters["w"] = 3.5 self.experiment.status_quo = Arm(sq_parameters) self.assertEqual(self.experiment.status_quo.parameters["w"], 3.5) self.assertEqual(self.experiment.status_quo.name, "status_quo") self.assertTrue("status_quo" in self.experiment.arms_by_name) # Verify all None values self.experiment.status_quo = Arm( {n: None for n in sq_parameters.keys()}) self.assertIsNone(self.experiment.status_quo.parameters["w"]) # Try extra param sq_parameters["a"] = 4 with self.assertRaises(ValueError): self.experiment.status_quo = Arm(sq_parameters) # Try wrong type sq_parameters.pop("a") sq_parameters["w"] = "hello" with self.assertRaises(ValueError): self.experiment.status_quo = Arm(sq_parameters) # Verify arms_by_signature, arms_by_name only contains status_quo self.assertEqual(len(self.experiment.arms_by_signature), 1) self.assertEqual(len(self.experiment.arms_by_name), 1) # Change status quo, verify still just 1 arm sq_parameters["w"] = 3.6 self.experiment.status_quo = Arm(sq_parameters) self.assertEqual(len(self.experiment.arms_by_signature), 1) self.assertEqual(len(self.experiment.arms_by_name), 1) # Make a batch, add status quo to it, then change exp status quo, verify 2 arms batch = self.experiment.new_batch_trial() batch.set_status_quo_with_weight(self.experiment.status_quo, 1) sq_parameters["w"] = 3.7 self.experiment.status_quo = Arm(sq_parameters) self.assertEqual(len(self.experiment.arms_by_signature), 2) self.assertEqual(len(self.experiment.arms_by_name), 2) self.assertEqual(self.experiment.status_quo.name, "status_quo_e0") self.assertTrue("status_quo_e0" in self.experiment.arms_by_name) # Try missing param sq_parameters.pop("w") with self.assertRaises(ValueError): self.experiment.status_quo = Arm(sq_parameters) # Actually name the status quo. exp = Experiment( name="test3", search_space=get_branin_search_space(), tracking_metrics=[ BraninMetric(name="b", param_names=["x1", "x2"]) ], runner=SyntheticRunner(), ) batch = exp.new_batch_trial() arms = get_branin_arms(n=1, seed=0) batch.add_arms_and_weights(arms=arms) self.assertIsNone(exp.status_quo) exp.status_quo = arms[0] self.assertEqual(exp.status_quo.name, "0_0") # Try setting sq to existing arm with different name with self.assertRaises(ValueError): exp.status_quo = Arm(arms[0].parameters, name="new_name")
def test_early_stopping_with_unaligned_results(self): # test case 1 exp = get_branin_experiment_with_timestamp_map_metric(rate=0.5) for i in range(5): trial = exp.new_trial().add_arm( arm=get_branin_arms(n=1, seed=i)[0]) trial.run() trial.mark_as(status=TrialStatus.COMPLETED) # manually "unalign" timestamps to simulate real-world scenario # where each curve reports results at different steps data = exp.fetch_data() unaligned_timestamps = [0, 1, 4, 1, 2, 3, 1, 3, 4, 0, 1, 2, 0, 2, 4] data.df.loc[data.df["metric_name"] == "branin", "timestamp"] = unaligned_timestamps exp.attach_data(data=data) """ Dataframe after interpolation: 0 1 2 3 4 timestamp 0 146.138620 NaN NaN 143.375669 65.033535 1 117.388086 113.057480 44.627226 115.168704 58.636359 2 111.575393 90.815154 40.237365 98.060315 52.239184 3 105.762700 77.324501 35.847504 NaN 48.359101 4 99.950007 NaN 30.522333 NaN 44.479018 """ # We consider trials 0, 2, and 4 for early stopping at progression 4, # and choose to stop trial 0. # We consider trial 1 for early stopping at progression 3, and # choose to stop it. # We consider trial 3 for early stopping at progression 2, and # choose to stop it. early_stopping_strategy = PercentileEarlyStoppingStrategy( percentile_threshold=50, min_curves=3, ) should_stop = early_stopping_strategy.should_stop_trials_early( trial_indices=set(exp.trials.keys()), experiment=exp) self.assertEqual(set(should_stop), {0, 1, 3}) # test case 2, where trial 3 has only 1 data point exp = get_branin_experiment_with_timestamp_map_metric(rate=0.5) for i in range(5): trial = exp.new_trial().add_arm( arm=get_branin_arms(n=1, seed=i)[0]) trial.run() trial.mark_as(status=TrialStatus.COMPLETED) # manually "unalign" timestamps to simulate real-world scenario # where each curve reports results at different steps data = exp.fetch_data() unaligned_timestamps = [0, 1, 4, 1, 2, 3, 1, 3, 4, 0, 1, 2, 0, 2, 4] data.df.loc[data.df["metric_name"] == "branin", "timestamp"] = unaligned_timestamps # manually remove timestamps 1 and 2 for arm 3 data.df.drop([22, 23], inplace=True) exp.attach_data(data=data) """ Dataframe after interpolation: 0 1 2 3 4 timestamp 0 146.138620 NaN NaN 143.375669 65.033535 1 117.388086 113.057480 44.627226 NaN 58.636359 2 111.575393 90.815154 40.237365 NaN 52.239184 3 105.762700 77.324501 35.847504 NaN 48.359101 4 99.950007 NaN 30.522333 NaN 44.479018 """ # We consider trials 0, 2, and 4 for early stopping at progression 4, # and choose to stop trial 0. # We consider trial 1 for early stopping at progression 3, and # choose to stop it. # We consider trial 3 for early stopping at progression 0, and # choose not to stop it. early_stopping_strategy = PercentileEarlyStoppingStrategy( percentile_threshold=50, min_curves=3, ) should_stop = early_stopping_strategy.should_stop_trials_early( trial_indices=set(exp.trials.keys()), experiment=exp) self.assertEqual(set(should_stop), {0, 1})