Exemplo n.º 1
0
    def sample_relative(
        self, study: Study, trial: FrozenTrial, search_space: Dict[str, BaseDistribution]
    ) -> Dict[str, Any]:

        if search_space == {}:
            return {}

        param_names = list(search_space.keys())
        values, scores = _get_multivariate_observation_pairs(study, param_names)

        # If the number of samples is insufficient, we run random trial.
        n = len(scores)
        if n < self._n_startup_trials:
            return {}

        # We divide data into below and above.
        below, above = self._split_multivariate_observation_pairs(values, scores)
        # We then sample by maximizing log likelihood ratio.
        mpe_below = _MultivariateParzenEstimator(
            below, search_space, self._parzen_estimator_parameters
        )
        mpe_above = _MultivariateParzenEstimator(
            above, search_space, self._parzen_estimator_parameters
        )
        samples_below = mpe_below.sample(self._rng, self._n_ei_candidates)
        log_likelihoods_below = mpe_below.log_pdf(samples_below)
        log_likelihoods_above = mpe_above.log_pdf(samples_below)
        ret = TPESampler._compare_multivariate(
            samples_below, log_likelihoods_below, log_likelihoods_above
        )

        for param_name, dist in search_space.items():
            ret[param_name] = dist.to_external_repr(ret[param_name])

        return ret
def test_suggest_with_step_multivariate_parzen_estimator() -> None:

    parameters = _ParzenEstimatorParameters(
        consider_prior=False,
        prior_weight=0.0,
        consider_magic_clip=False,
        consider_endpoints=False,
        weights=lambda x: np.arange(x) + 1.0,
    )

    # Define search space for distribution with step argument and true ranges
    search_space = {
        "c": distributions.DiscreteUniformDistribution(low=1.0, high=7.0, q=3.0),
        "d": distributions.IntUniformDistribution(low=1, high=5, step=2),
    }
    multivariate_samples = {"c": np.array([4]), "d": np.array([1])}
    valid_ranges = {"c": set(np.arange(1.0, 10.0, 3.0)), "d": set(np.arange(1, 7, 2))}

    with patch(_PRECOMPUTE_SIGMAS0, return_value=np.ones(2)):
        mpe = _MultivariateParzenEstimator(multivariate_samples, search_space, parameters)

    # Draw 10 samples, and check if all valid values are sampled.
    output_multivariate_samples = mpe.sample(np.random.RandomState(0), 10)
    for param_name in output_multivariate_samples:
        assert set(output_multivariate_samples[param_name]) == valid_ranges[param_name]
def test_sample_multivariate_parzen_estimator() -> None:

    parameters = _ParzenEstimatorParameters(
        consider_prior=False,
        prior_weight=0.0,
        consider_magic_clip=False,
        consider_endpoints=False,
        weights=lambda x: np.arange(x) + 1.0,
    )

    with patch(_PRECOMPUTE_SIGMAS0, return_value=1e-8 * np.ones(2)):
        mpe = _MultivariateParzenEstimator(MULTIVARIATE_SAMPLES, SEARCH_SPACE, parameters)

    # Test the shape of the samples.
    output_multivariate_samples = mpe.sample(np.random.RandomState(0), 3)
    for param_name in output_multivariate_samples:
        assert output_multivariate_samples[param_name].shape == (3,)

    # Test the values of the output.
    # As we set ``consider_prior`` = False and pre-computed sigma to be 1e-8,
    # the samples almost equals to the input ``MULTIVARIATE_SAMPLES``.
    output_multivariate_samples = mpe.sample(np.random.RandomState(0), 1)
    for param_name, samples in output_multivariate_samples.items():
        if samples.dtype == str:
            assert samples[0] == "y", "parameter {}".format(param_name)
        else:
            np.testing.assert_almost_equal(
                samples,
                MULTIVARIATE_SAMPLES[param_name],
                decimal=2,
                err_msg="parameter {}".format(param_name),
            )

    # Test the output when the seeds are fixed.
    assert output_multivariate_samples == mpe.sample(np.random.RandomState(0), 1)
def test_log_pdf_multivariate_parzen_estimator() -> None:

    parameters = _ParzenEstimatorParameters(
        consider_prior=False,
        prior_weight=1.0,
        consider_magic_clip=True,
        consider_endpoints=True,
        weights=lambda x: np.arange(x) + 1.0,
    )
    # Parzen estimator almost becomes mixture of Dirac measures.
    with patch(_PRECOMPUTE_SIGMAS0, return_value=1e-8 * np.ones(1)):
        mpe = _MultivariateParzenEstimator(MULTIVARIATE_SAMPLES, SEARCH_SPACE, parameters)

    log_pdf = mpe.log_pdf(MULTIVARIATE_SAMPLES)
    output_multivariate_samples = mpe.sample(np.random.RandomState(0), 100)
    output_log_pdf = mpe.log_pdf(output_multivariate_samples)
    # The likelihood of the previous observations is a positive value, and that of the points
    # sampled by the Parzen estimator is almost zero.
    assert np.all(log_pdf >= output_log_pdf)
def test_init_multivariate_parzen_estimator(consider_prior: bool) -> None:

    parameters = _ParzenEstimatorParameters(
        consider_prior=consider_prior,
        prior_weight=1.0,
        consider_magic_clip=False,
        consider_endpoints=False,
        weights=lambda x: np.arange(x) + 1.0,
    )

    with patch(_PRECOMPUTE_SIGMAS0, return_value=np.ones(1)):
        mpe = _MultivariateParzenEstimator(MULTIVARIATE_SAMPLES, SEARCH_SPACE,
                                           parameters)

    weights = np.array([1] + consider_prior * [1], dtype=float)
    weights /= weights.sum()
    q = {"a": None, "b": None, "c": 3.0, "d": 1.0, "e": None, "f": None}
    low = {
        "a": 1.0,
        "b": np.log(1.0),
        "c": -0.5,
        "d": 0.5,
        "e": np.log(0.5),
        "f": None
    }
    high = {
        "a": 100.0,
        "b": np.log(100.0),
        "c": 101.5,
        "d": 100.5,
        "e": np.log(100.5),
        "f": None
    }

    assert np.all(mpe._weights == weights)
    assert mpe._q == q
    assert mpe._low == low
    assert mpe._high == high

    expected_sigmas = {
        "a": [99.0] + consider_prior * [99.0],
        "b": [np.log(100.0)] + consider_prior * [np.log(100)],
        "c": [102.0] + consider_prior * [102.0],
        "d": [100.0] + consider_prior * [100.0],
        "e": [np.log(100.5) - np.log(0.5)] +
        consider_prior * [np.log(100.5) - np.log(0.5)],
        "f":
        None,
    }
    expected_mus = {
        "a": [1.0] + consider_prior * [50.5],
        "b": [np.log(1.0)] + consider_prior * [np.log(100) / 2.0],
        "c": [1.0] + consider_prior * [50.5],
        "d": [1.0] + consider_prior * [50.5],
        "e":
        [np.log(1.0)] + consider_prior * [(np.log(100.5) + np.log(0.5)) / 2.0],
        "f": None,
    }
    expected_categorical_weights = {
        "a":
        None,
        "b":
        None,
        "c":
        None,
        "d":
        None,
        "e":
        None,
        "f":
        np.array([[0.2, 0.6, 0.2], [1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0]])
        if consider_prior else np.array([[0.25, 0.5, 0.25]]),
    }

    for param_name in mpe._sigmas.keys():
        np.testing.assert_equal(
            mpe._sigmas[param_name],
            expected_sigmas[param_name],
            err_msg='parameter "{}"'.format(param_name),
        )
        np.testing.assert_equal(
            mpe._mus[param_name],
            expected_mus[param_name],
            err_msg="parameter: {}".format(param_name),
        )
        np.testing.assert_equal(
            mpe._categorical_weights[param_name],
            expected_categorical_weights[param_name],
            err_msg="parameter: {}".format(param_name),
        )