Exemplo n.º 1
0
    def setUp(self):
        self.branin_experiment = get_branin_experiment()
        self.branin_timestamp_map_metric_experiment = (
            get_branin_experiment_with_timestamp_map_metric())

        self.runner = SyntheticRunnerWithStatusPolling()
        self.branin_experiment.runner = self.runner
        self.branin_experiment._properties[
            Keys.IMMUTABLE_SEARCH_SPACE_AND_OPT_CONF] = True
        self.branin_experiment_no_impl_runner_or_metrics = Experiment(
            search_space=get_branin_search_space(),
            optimization_config=OptimizationConfig(objective=Objective(
                metric=Metric(name="branin"))),
        )
        self.sobol_GPEI_GS = choose_generation_strategy(
            search_space=get_branin_search_space())
        self.two_sobol_steps_GS = GenerationStrategy(  # Contrived GS to ensure
            steps=[  # that `DataRequiredError` is property handled in scheduler.
                GenerationStep(  # This error is raised when not enough trials
                    model=Models.
                    SOBOL,  # have been observed to proceed to next
                    num_trials=5,  # geneneration step.
                    min_trials_observed=3,
                    max_parallelism=2,
                ),
                GenerationStep(model=Models.SOBOL,
                               num_trials=-1,
                               max_parallelism=3),
            ])
        # GS to force the scheduler to poll completed trials after each ran trial.
        self.sobol_GS_no_parallelism = GenerationStrategy(steps=[
            GenerationStep(
                model=Models.SOBOL, num_trials=-1, max_parallelism=1)
        ])
Exemplo n.º 2
0
    def test_percentile_early_stopping_strategy(self):
        exp = get_branin_experiment_with_timestamp_map_metric(rate=0.5)
        for i in range(5):
            trial = exp.new_trial().add_arm(
                arm=get_branin_arms(n=1, seed=i)[0])
            trial.run()

        exp.attach_data(data=exp.fetch_data())
        """
        Data looks like this:
        arm_name metric_name        mean  sem  trial_index  timestamp
        0       0_0      branin  146.138620  0.0            0          0
        1       0_0      branin  117.388086  0.0            0          1
        2       0_0      branin   99.950007  0.0            0          2
        3       1_0      branin  113.057480  0.0            1          0
        4       1_0      branin   90.815154  0.0            1          1
        5       1_0      branin   77.324501  0.0            1          2
        6       2_0      branin   44.627226  0.0            2          0
        7       2_0      branin   35.847504  0.0            2          1
        8       2_0      branin   30.522333  0.0            2          2
        9       3_0      branin  143.375669  0.0            3          0
        10      3_0      branin  115.168704  0.0            3          1
        11      3_0      branin   98.060315  0.0            3          2
        12      4_0      branin   65.033535  0.0            4          0
        13      4_0      branin   52.239184  0.0            4          1
        14      4_0      branin   44.479018  0.0            4          2

        Looking at the most recent fidelity only (timestamp==2), we have
        the following metric values for each trial:
        0: 99.950007 <-- worst
        3: 98.060315
        1: 77.324501
        4: 44.479018
        2: 30.522333 <-- best
        """
        idcs = set(exp.trials.keys())

        early_stopping_strategy = PercentileEarlyStoppingStrategy(
            percentile_threshold=25, )
        should_stop = early_stopping_strategy.should_stop_trials_early(
            trial_indices=idcs, experiment=exp)
        self.assertEqual(set(should_stop), {0})

        early_stopping_strategy = PercentileEarlyStoppingStrategy(
            percentile_threshold=50, )
        should_stop = early_stopping_strategy.should_stop_trials_early(
            trial_indices=idcs, experiment=exp)
        self.assertEqual(set(should_stop), {0, 3})

        # respect trial_indices argument
        should_stop = early_stopping_strategy.should_stop_trials_early(
            trial_indices={0}, experiment=exp)
        self.assertEqual(set(should_stop), {0})

        early_stopping_strategy = PercentileEarlyStoppingStrategy(
            percentile_threshold=75, )
        should_stop = early_stopping_strategy.should_stop_trials_early(
            trial_indices=idcs, experiment=exp)
        self.assertEqual(set(should_stop), {0, 3, 1})
Exemplo n.º 3
0
    def _setupBraninExperiment(self, n: int, incremental: bool = False) -> Experiment:
        exp = get_branin_experiment_with_timestamp_map_metric(incremental=incremental)
        batch = exp.new_batch_trial()
        batch.add_arms_and_weights(arms=get_branin_arms(n=n, seed=0))
        batch.run()

        batch_2 = exp.new_batch_trial()
        batch_2.add_arms_and_weights(arms=get_branin_arms(n=3 * n, seed=1))
        batch_2.run()
        return exp
Exemplo n.º 4
0
    def test_threshold_early_stopping_strategy(self):
        exp = get_branin_experiment_with_timestamp_map_metric(rate=0.5)
        for i in range(5):
            trial = exp.new_trial().add_arm(
                arm=get_branin_arms(n=1, seed=i)[0])
            trial.run()

        exp.attach_data(data=exp.fetch_data())
        """
        Data looks like this:
        arm_name metric_name        mean  sem  trial_index  timestamp
        0       0_0      branin  146.138620  0.0            0          0
        1       0_0      branin  117.388086  0.0            0          1
        2       0_0      branin   99.950007  0.0            0          2
        3       1_0      branin  113.057480  0.0            1          0
        4       1_0      branin   90.815154  0.0            1          1
        5       1_0      branin   77.324501  0.0            1          2
        6       2_0      branin   44.627226  0.0            2          0
        7       2_0      branin   35.847504  0.0            2          1
        8       2_0      branin   30.522333  0.0            2          2
        9       3_0      branin  143.375669  0.0            3          0
        10      3_0      branin  115.168704  0.0            3          1
        11      3_0      branin   98.060315  0.0            3          2
        12      4_0      branin   65.033535  0.0            4          0
        13      4_0      branin   52.239184  0.0            4          1
        14      4_0      branin   44.479018  0.0            4          2
        """
        idcs = set(exp.trials.keys())

        early_stopping_strategy = ThresholdEarlyStoppingStrategy(
            metric_threshold=50, min_progression=1)
        should_stop = early_stopping_strategy.should_stop_trials_early(
            trial_indices=idcs, experiment=exp)
        self.assertEqual(set(should_stop), {0, 1, 3})

        # respect trial_indices argument
        should_stop = early_stopping_strategy.should_stop_trials_early(
            trial_indices={0}, experiment=exp)
        self.assertEqual(set(should_stop), {0})

        # test ignore trial indices
        early_stopping_strategy = ThresholdEarlyStoppingStrategy(
            metric_threshold=50,
            min_progression=1,
            trial_indices_to_ignore={0})
        should_stop = early_stopping_strategy.should_stop_trials_early(
            trial_indices=idcs, experiment=exp)
        self.assertEqual(set(should_stop), {1, 3})

        # test did not reach min progression
        early_stopping_strategy = ThresholdEarlyStoppingStrategy(
            metric_threshold=50, min_progression=3)
        should_stop = early_stopping_strategy.should_stop_trials_early(
            trial_indices=idcs, experiment=exp)
        self.assertEqual(should_stop, {})
Exemplo n.º 5
0
    def test_percentile_early_stopping_strategy_validation(self):
        exp = get_branin_experiment()

        for i in range(5):
            trial = exp.new_trial().add_arm(
                arm=get_branin_arms(n=1, seed=i)[0])
            trial.run()
            trial.mark_as(status=TrialStatus.COMPLETED)

        early_stopping_strategy = PercentileEarlyStoppingStrategy()
        idcs = set(exp.trials.keys())
        exp.attach_data(data=exp.fetch_data())

        # Non-MapData attached
        should_stop = early_stopping_strategy.should_stop_trials_early(
            trial_indices=idcs, experiment=exp)
        self.assertEqual(should_stop, {})

        exp = get_branin_experiment_with_timestamp_map_metric(rate=0.5)
        for i in range(5):
            trial = exp.new_trial().add_arm(
                arm=get_branin_arms(n=1, seed=i)[0])
            trial.run()

        # No data attached
        should_stop = early_stopping_strategy.should_stop_trials_early(
            trial_indices=idcs, experiment=exp)
        self.assertEqual(should_stop, {})

        exp.attach_data(data=exp.fetch_data())

        # Not enough learning curves
        early_stopping_strategy = PercentileEarlyStoppingStrategy(
            min_curves=6, )
        should_stop = early_stopping_strategy.should_stop_trials_early(
            trial_indices=idcs, experiment=exp)
        self.assertEqual(should_stop, {})

        # Most recent progression below minimum
        early_stopping_strategy = PercentileEarlyStoppingStrategy(
            min_progression=3, )
        should_stop = early_stopping_strategy.should_stop_trials_early(
            trial_indices=idcs, experiment=exp)
        self.assertEqual(should_stop, {})

        # True objective metric name
        self.assertIsNone(
            early_stopping_strategy.true_objective_metric_name)  # default none
        early_stopping_strategy.true_objective_metric_name = "true_obj_metric"
        self.assertEqual(early_stopping_strategy.true_objective_metric_name,
                         "true_obj_metric")
Exemplo n.º 6
0
    def test_percentile_early_stopping_strategy_validation(self):
        exp = get_branin_experiment()

        for i in range(5):
            trial = exp.new_trial().add_arm(
                arm=get_branin_arms(n=1, seed=i)[0])
            trial.run()

        early_stopping_strategy = PercentileEarlyStoppingStrategy()
        idcs = set(exp.trials.keys())
        exp.attach_data(data=exp.fetch_data())

        # Non-MapData attached
        with self.assertRaisesRegex(ValueError, "expects MapData"):
            early_stopping_strategy.should_stop_trials_early(
                trial_indices=idcs, experiment=exp)

        exp = get_branin_experiment_with_timestamp_map_metric(rate=0.5)
        for i in range(5):
            trial = exp.new_trial().add_arm(
                arm=get_branin_arms(n=1, seed=i)[0])
            trial.run()

        # No data attached
        should_stop = early_stopping_strategy.should_stop_trials_early(
            trial_indices=idcs, experiment=exp)
        self.assertEqual(should_stop, {})

        exp.attach_data(data=exp.fetch_data())

        # Not enough learning curves
        early_stopping_strategy = PercentileEarlyStoppingStrategy(
            min_curves=6, )
        should_stop = early_stopping_strategy.should_stop_trials_early(
            trial_indices=idcs, experiment=exp)
        self.assertEqual(should_stop, {})

        # Most recent progression below minimum
        early_stopping_strategy = PercentileEarlyStoppingStrategy(
            min_progression=3, )
        should_stop = early_stopping_strategy.should_stop_trials_early(
            trial_indices=idcs, experiment=exp)
        self.assertEqual(should_stop, {})
Exemplo n.º 7
0
    def test_early_stopping_with_unaligned_results(self):
        # test case 1
        exp = get_branin_experiment_with_timestamp_map_metric(rate=0.5)
        for i in range(5):
            trial = exp.new_trial().add_arm(
                arm=get_branin_arms(n=1, seed=i)[0])
            trial.run()
            trial.mark_as(status=TrialStatus.COMPLETED)

        # manually "unalign" timestamps to simulate real-world scenario
        # where each curve reports results at different steps
        data = exp.fetch_data()
        unaligned_timestamps = [0, 1, 4, 1, 2, 3, 1, 3, 4, 0, 1, 2, 0, 2, 4]
        data.df.loc[data.df["metric_name"] == "branin",
                    "timestamp"] = unaligned_timestamps
        exp.attach_data(data=data)
        """
        Dataframe after interpolation:
                    0           1          2           3          4
        timestamp
        0          146.138620         NaN        NaN  143.375669  65.033535
        1          117.388086  113.057480  44.627226  115.168704  58.636359
        2          111.575393   90.815154  40.237365   98.060315  52.239184
        3          105.762700   77.324501  35.847504         NaN  48.359101
        4           99.950007         NaN  30.522333         NaN  44.479018
        """

        # We consider trials 0, 2, and 4 for early stopping at progression 4,
        #    and choose to stop trial 0.
        # We consider trial 1 for early stopping at progression 3, and
        #    choose to stop it.
        # We consider trial 3 for early stopping at progression 2, and
        #    choose to stop it.
        early_stopping_strategy = PercentileEarlyStoppingStrategy(
            percentile_threshold=50,
            min_curves=3,
        )
        should_stop = early_stopping_strategy.should_stop_trials_early(
            trial_indices=set(exp.trials.keys()), experiment=exp)
        self.assertEqual(set(should_stop), {0, 1, 3})

        # test case 2, where trial 3 has only 1 data point
        exp = get_branin_experiment_with_timestamp_map_metric(rate=0.5)
        for i in range(5):
            trial = exp.new_trial().add_arm(
                arm=get_branin_arms(n=1, seed=i)[0])
            trial.run()
            trial.mark_as(status=TrialStatus.COMPLETED)

        # manually "unalign" timestamps to simulate real-world scenario
        # where each curve reports results at different steps
        data = exp.fetch_data()
        unaligned_timestamps = [0, 1, 4, 1, 2, 3, 1, 3, 4, 0, 1, 2, 0, 2, 4]
        data.df.loc[data.df["metric_name"] == "branin",
                    "timestamp"] = unaligned_timestamps
        # manually remove timestamps 1 and 2 for arm 3
        data.df.drop([22, 23], inplace=True)
        exp.attach_data(data=data)
        """
        Dataframe after interpolation:
                    0           1          2           3          4
        timestamp
        0          146.138620         NaN        NaN  143.375669  65.033535
        1          117.388086  113.057480  44.627226         NaN  58.636359
        2          111.575393   90.815154  40.237365         NaN  52.239184
        3          105.762700   77.324501  35.847504         NaN  48.359101
        4           99.950007         NaN  30.522333         NaN  44.479018
        """

        # We consider trials 0, 2, and 4 for early stopping at progression 4,
        #    and choose to stop trial 0.
        # We consider trial 1 for early stopping at progression 3, and
        #    choose to stop it.
        # We consider trial 3 for early stopping at progression 0, and
        #    choose not to stop it.
        early_stopping_strategy = PercentileEarlyStoppingStrategy(
            percentile_threshold=50,
            min_curves=3,
        )
        should_stop = early_stopping_strategy.should_stop_trials_early(
            trial_indices=set(exp.trials.keys()), experiment=exp)
        self.assertEqual(set(should_stop), {0, 1})
Exemplo n.º 8
0
    def test_get_standard_plots(self):
        exp = get_branin_experiment()
        self.assertEqual(
            len(
                get_standard_plots(experiment=exp,
                                   model=get_generation_strategy().model)),
            0,
        )
        exp = get_branin_experiment(with_batch=True, minimize=True)
        exp.trials[0].run()
        plots = get_standard_plots(
            experiment=exp,
            model=Models.BOTORCH(experiment=exp, data=exp.fetch_data()),
        )
        self.assertEqual(len(plots), 6)
        self.assertTrue(all(isinstance(plot, go.Figure) for plot in plots))
        exp = get_branin_experiment_with_multi_objective(with_batch=True)
        exp.optimization_config.objective.objectives[0].minimize = False
        exp.optimization_config.objective.objectives[1].minimize = True
        exp.optimization_config._objective_thresholds = [
            ObjectiveThreshold(metric=exp.metrics["branin_a"],
                               op=ComparisonOp.GEQ,
                               bound=-100.0),
            ObjectiveThreshold(metric=exp.metrics["branin_b"],
                               op=ComparisonOp.LEQ,
                               bound=100.0),
        ]
        exp.trials[0].run()
        plots = get_standard_plots(experiment=exp,
                                   model=Models.MOO(experiment=exp,
                                                    data=exp.fetch_data()))
        self.assertEqual(len(plots), 7)

        # All plots are successfully created when objective thresholds are absent
        exp.optimization_config._objective_thresholds = []
        plots = get_standard_plots(experiment=exp,
                                   model=Models.MOO(experiment=exp,
                                                    data=exp.fetch_data()))
        self.assertEqual(len(plots), 7)

        exp = get_branin_experiment_with_timestamp_map_metric(
            with_status_quo=True)
        exp.new_trial().add_arm(exp.status_quo)
        exp.trials[0].run()
        exp.new_trial(generator_run=Models.SOBOL(
            search_space=exp.search_space).gen(n=1))
        exp.trials[1].run()
        plots = get_standard_plots(
            experiment=exp,
            model=Models.BOTORCH(experiment=exp, data=exp.fetch_data()),
            true_objective_metric_name="branin",
        )

        self.assertEqual(len(plots), 9)
        self.assertTrue(all(isinstance(plot, go.Figure) for plot in plots))
        self.assertIn(
            "Objective branin_map vs. True Objective Metric branin",
            [p.layout.title.text for p in plots],
        )

        with self.assertRaisesRegex(
                ValueError, "Please add a valid true_objective_metric_name"):
            plots = get_standard_plots(
                experiment=exp,
                model=Models.BOTORCH(experiment=exp, data=exp.fetch_data()),
                true_objective_metric_name="not_present",
            )