def tuning_job_state(): return { 'algo-1': TuningJobState(hp_ranges=HyperparameterRanges_Impl( HyperparameterRangeContinuous('a1_hp_1', -5.0, 5.0, LinearScaling()), HyperparameterRangeCategorical('a1_hp_2', ('a', 'b', 'c'))), candidate_evaluations=[ CandidateEvaluation(candidate=(-3.0, 'a'), value=1.0), CandidateEvaluation(candidate=(-1.9, 'c'), value=2.0), CandidateEvaluation(candidate=(-3.5, 'a'), value=0.3) ], failed_candidates=[], pending_evaluations=[]), 'algo-2': TuningJobState(hp_ranges=HyperparameterRanges_Impl( HyperparameterRangeContinuous('a2_hp_1', -5.0, 5.0, LinearScaling()), HyperparameterRangeInteger('a2_hp_2', -5, 5, LinearScaling(), -5, 5)), candidate_evaluations=[ CandidateEvaluation(candidate=(-1.9, -1), value=0.0), CandidateEvaluation(candidate=(-3.5, 3), value=2.0) ], failed_candidates=[], pending_evaluations=[]) }
def default_models() -> List[GPMXNetModel]: X = [ (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0), (0.0, 0.0 ), # same evals are added multiple times to force GP to unlearn prior (1.0, 0.0), (0.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0), ] Y = [dictionarize_objective(np.sum(x) * 10.0) for x in X] state = TuningJobState( HyperparameterRanges_Impl( HyperparameterRangeContinuous('x', 0.0, 1.0, LinearScaling()), HyperparameterRangeContinuous('y', 0.0, 1.0, LinearScaling()), ), [CandidateEvaluation(x, y) for x, y in zip(X, Y)], [], [], ) random_seed = 0 gpmodel = default_gpmodel(state, random_seed=random_seed, optimization_config=DEFAULT_OPTIMIZATION_CONFIG) gpmodel_mcmc = default_gpmodel_mcmc(state, random_seed=random_seed, mcmc_config=DEFAULT_MCMC_CONFIG) return [ GPMXNetModel(state, DEFAULT_METRIC, random_seed, gpmodel, fit_parameters=True, num_fantasy_samples=20), GPMXNetModel(state, DEFAULT_METRIC, random_seed, gpmodel_mcmc, fit_parameters=True, num_fantasy_samples=20) ]
def tuning_job_state() -> TuningJobState: X = [ (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0), ] Y = [dictionarize_objective(np.sum(x) * 10.0) for x in X] return TuningJobState( HyperparameterRanges_Impl( HyperparameterRangeContinuous('x', 0.0, 1.0, LinearScaling()), HyperparameterRangeContinuous('y', 0.0, 1.0, LinearScaling()), ), [CandidateEvaluation(x, y) for x, y in zip(X, Y)], [], [])
def tuning_job_state_mcmc(X, Y) -> TuningJobState: Y = [dictionarize_objective(y) for y in Y] return TuningJobState( HyperparameterRanges_Impl( HyperparameterRangeContinuous('x', -4., 4., LinearScaling())), [CandidateEvaluation(x, y) for x, y in zip(X, Y)], [], [])
def test_to_ndarray_name_last_pos(): np.random.seed(123456) random_state = np.random.RandomState(123456) config_space = CS.ConfigurationSpace() config_space.add_hyperparameters([ CSH.UniformFloatHyperparameter('a', lower=0., upper=1.), CSH.UniformIntegerHyperparameter('b', lower=2, upper=3), CSH.CategoricalHyperparameter('c', choices=('1', '2', '3')), CSH.UniformIntegerHyperparameter('d', lower=2, upper=3), CSH.CategoricalHyperparameter('e', choices=('1', '2')) ]) hp_a = HyperparameterRangeContinuous('a', lower_bound=0., upper_bound=1., scaling=LinearScaling()) hp_b = HyperparameterRangeInteger('b', lower_bound=2, upper_bound=3, scaling=LinearScaling()) hp_c = HyperparameterRangeCategorical('c', choices=('1', '2', '3')) hp_d = HyperparameterRangeInteger('d', lower_bound=2, upper_bound=3, scaling=LinearScaling()) hp_e = HyperparameterRangeCategorical('e', choices=('1', '2')) for name_last_pos in ['a', 'c', 'd', 'e']: hp_ranges_cs = HyperparameterRanges_CS(config_space, name_last_pos=name_last_pos) if name_last_pos == 'a': lst = [hp_b, hp_c, hp_d, hp_e, hp_a] elif name_last_pos == 'c': lst = [hp_a, hp_b, hp_d, hp_e, hp_c] elif name_last_pos == 'd': lst = [hp_a, hp_b, hp_c, hp_e, hp_d] else: lst = [hp_a, hp_b, hp_c, hp_d, hp_e] hp_ranges = HyperparameterRanges_Impl(*lst) names = [hp.name for hp in hp_ranges.hp_ranges] config_cs = hp_ranges_cs.random_candidate(random_state) _config = config_cs.get_dictionary() config = (_config[name] for name in names) ndarr_cs = hp_ranges_cs.to_ndarray(config_cs) ndarr = hp_ranges.to_ndarray(config) assert_allclose(ndarr_cs, ndarr, rtol=1e-4)
def search_space(self): return HyperparameterRanges_Impl( HyperparameterRangeContinuous('x', 1.0, 100.0, scaling=LogScaling()), HyperparameterRangeInteger('y', 0, 2, scaling=LinearScaling()), HyperparameterRangeCategorical('z', ('0.0', '1.0', '2.0')))
def test_get_internal_candidate_evaluations(): """we do not test the case with no evaluations, as it is assumed that there will be always some evaluations generated in the beginning of the BO loop.""" candidates = [ CandidateEvaluation((2, 3.3, 'X'), dictionarize_objective(5.3)), CandidateEvaluation((1, 9.9, 'Y'), dictionarize_objective(10.9)), CandidateEvaluation((7, 6.1, 'X'), dictionarize_objective(13.1)), ] state = TuningJobState( hp_ranges=HyperparameterRanges_Impl( HyperparameterRangeInteger('integer', 0, 10, LinearScaling()), HyperparameterRangeContinuous('real', 0, 10, LinearScaling()), HyperparameterRangeCategorical('categorical', ('X', 'Y')), ), candidate_evaluations=candidates, failed_candidates=[candidates[0].candidate ], # these should be ignored by the model pending_evaluations=[]) result = get_internal_candidate_evaluations(state, DEFAULT_METRIC, normalize_targets=True, num_fantasize_samples=20) assert len(result.X.shape) == 2, "Input should be a matrix" assert len(result.y.shape) == 2, "Output should be a matrix" assert result.X.shape[0] == len(candidates) assert result.y.shape[ -1] == 1, "Only single output value per row is suppored" assert np.abs(np.mean( result.y)) < 1e-8, "Mean of the normalized outputs is not 0.0" assert np.abs(np.std(result.y) - 1.0) < 1e-8, "Std. of the normalized outputs is not 1.0" np.testing.assert_almost_equal(result.mean, 9.766666666666666) np.testing.assert_almost_equal(result.std, 3.283629428273267)
def test_pick_from_locally_optimized(): duplicate_detector1 = DuplicateDetectorIdentical() duplicate_detector2 = DuplicateDetectorEpsilon( hp_ranges=HyperparameterRanges_Impl( HyperparameterRangeContinuous( 'hp1', -10.0, 10.0, scaling=LinearScaling()), HyperparameterRangeContinuous( 'hp2', -10.0, 10.0, scaling=LinearScaling()), )) for duplicate_detector in (duplicate_detector1, duplicate_detector2): got = _pick_from_locally_optimized( candidates_with_optimization=[ # original, optimized ((0.1, 1.0), (0.1, 1.0)), ((0.1, 1.0), (0.6, 1.0)), # not a duplicate ((0.2, 1.0), (0.1, 1.0)), # duplicate optimized; Resolved by the original ((0.1, 1.0), (0.1, 1.0)), # complete duplicate ((0.3, 1.0), (0.1, 1.0)), # blacklisted original ((0.4, 3.0), (0.3, 1.0)), # blacklisted all ((1.0, 2.0), (1.0, 1.0)), # final candidate to be selected into a batch ((0.0, 2.0), (1.0, 0.0)), # skipped ((0.0, 2.0), (1.0, 0.0)), # skipped ], blacklisted_candidates={ (0.3, 1.0), (0.4, 3.0), (0.0, 0.0), # blacklisted candidate, not present in candidates }, num_candidates=4, duplicate_detector=duplicate_detector, ) expected = [(0.1, 1.0), (0.6, 1.0), (0.2, 1.0), (1.0, 1.0)] # order of the candidates should be preserved assert len(expected) == len(got) assert all(a == b for a, b in zip(got, expected))
def default_models(do_mcmc=True) -> List[GPMXNetModel]: X = [ (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0), ] Y = [dictionarize_objective(np.sum(x) * 10.0) for x in X] state = TuningJobState( HyperparameterRanges_Impl( HyperparameterRangeContinuous('x', 0.0, 1.0, LinearScaling()), HyperparameterRangeContinuous('y', 0.0, 1.0, LinearScaling()), ), [CandidateEvaluation(x, y) for x, y in zip(X, Y)], [], []) random_seed = 0 gpmodel = default_gpmodel(state, random_seed=random_seed, optimization_config=DEFAULT_OPTIMIZATION_CONFIG) result = [ GPMXNetModel(state, DEFAULT_METRIC, random_seed, gpmodel, fit_parameters=True, num_fantasy_samples=20) ] if do_mcmc: gpmodel_mcmc = default_gpmodel_mcmc(state, random_seed=random_seed, mcmc_config=DEFAULT_MCMC_CONFIG) result.append( GPMXNetModel(state, DEFAULT_METRIC, random_seed, gpmodel_mcmc, fit_parameters=True, num_fantasy_samples=20)) return result
def test_dimensionality_and_warping_ranges(): hp_ranges = HyperparameterRanges_Impl( HyperparameterRangeCategorical('categorical1', ('X', 'Y')), HyperparameterRangeContinuous('integer', 0.1, 10.0, LogScaling()), HyperparameterRangeCategorical('categorical2', ('a', 'b', 'c')), HyperparameterRangeContinuous('real', 0.0, 10.0, LinearScaling(), 2.5, 5.0), HyperparameterRangeCategorical('categorical3', ('X', 'Y')), ) dim, warping_ranges = dimensionality_and_warping_ranges(hp_ranges) assert dim == 9 assert warping_ranges == {2: (0.0, 1.0), 6: (0.0, 1.0)}
def multi_algo_state(): def _candidate_evaluations(num): return [ CandidateEvaluation(candidate=(i, ), metrics=dictionarize_objective(float(i))) for i in range(num) ] return { '0': TuningJobState( hp_ranges=HyperparameterRanges_Impl( HyperparameterRangeContinuous('a1_hp_1', -5.0, 5.0, LinearScaling(), -5.0, 5.0)), candidate_evaluations=_candidate_evaluations(2), failed_candidates=[(i, ) for i in range(3)], pending_evaluations=[PendingEvaluation((i, )) for i in range(100)]), '1': TuningJobState(hp_ranges=HyperparameterRanges_Impl(), candidate_evaluations=_candidate_evaluations(5), failed_candidates=[], pending_evaluations=[]), '2': TuningJobState( hp_ranges=HyperparameterRanges_Impl(), candidate_evaluations=_candidate_evaluations(3), failed_candidates=[(i, ) for i in range(10)], pending_evaluations=[PendingEvaluation((i, )) for i in range(1)]), '3': TuningJobState(hp_ranges=HyperparameterRanges_Impl(), candidate_evaluations=_candidate_evaluations(6), failed_candidates=[], pending_evaluations=[]), '4': TuningJobState(hp_ranges=HyperparameterRanges_Impl(), candidate_evaluations=_candidate_evaluations(120), failed_candidates=[], pending_evaluations=[]), }
def test_distribution_of_random_candidates(): random_state = np.random.RandomState(0) hp_ranges = HyperparameterRanges_Impl( HyperparameterRangeContinuous('0', 1.0, 1000.0, scaling=LinearScaling()), HyperparameterRangeContinuous('1', 1.0, 1000.0, scaling=LogScaling()), HyperparameterRangeContinuous('2', 0.9, 0.9999, scaling=ReverseLogScaling()), HyperparameterRangeInteger('3', 1, 1000, scaling=LinearScaling()), HyperparameterRangeInteger('4', 1, 1000, scaling=LogScaling()), HyperparameterRangeCategorical('5', ('a', 'b', 'c')), ) num_random_candidates = 600 random_candidates = [ hp_ranges.random_candidate(random_state) for _ in range(num_random_candidates) ] # check converting back gets to the same candidate for cand in random_candidates[2:]: ndarray_candidate = hp_ranges.to_ndarray(cand) converted_back = hp_ranges.from_ndarray(ndarray_candidate) for hp, hp_converted_back in zip(cand, converted_back): if isinstance(hp, str): assert hp == hp_converted_back else: assert_almost_equal(hp, hp_converted_back) hps0, hps1, hps2, hps3, hps4, hps5 = zip(*random_candidates) assert 200 < np.percentile(hps0, 25) < 300 assert 450 < np.percentile(hps0, 50) < 550 assert 700 < np.percentile(hps0, 75) < 800 # same bounds as the previous but log scaling assert 3 < np.percentile(hps1, 25) < 10 assert 20 < np.percentile(hps1, 50) < 40 assert 100 < np.percentile(hps1, 75) < 200 # reverse log assert 0.9 < np.percentile(hps2, 25) < 0.99 assert 0.99 < np.percentile(hps2, 50) < 0.999 assert 0.999 < np.percentile(hps2, 75) < 0.9999 # integer assert 200 < np.percentile(hps3, 25) < 300 assert 450 < np.percentile(hps3, 50) < 550 assert 700 < np.percentile(hps3, 75) < 800 # same bounds as the previous but log scaling assert 3 < np.percentile(hps4, 25) < 10 assert 20 < np.percentile(hps4, 50) < 40 assert 100 < np.percentile(hps4, 75) < 200 counter = Counter(hps5) assert len(counter) == 3 assert 150 < counter['a'] < 250 # should be about 200 assert 150 < counter['b'] < 250 # should be about 200 assert 150 < counter['c'] < 250 # should be about 200
from collections import Counter import numpy as np import pytest from numpy.testing import assert_allclose, assert_almost_equal from pytest import approx from autogluon.searcher.bayesopt.datatypes.hp_ranges import \ HyperparameterRangeContinuous, HyperparameterRangeInteger, \ HyperparameterRangeCategorical, HyperparameterRanges_Impl from autogluon.searcher.bayesopt.datatypes.scaling import LinearScaling, \ LogScaling, ReverseLogScaling @pytest.mark.parametrize('lower,upper,external_hp,internal_ndarray,scaling', [ (0.0, 8.0, 0.0, 0.0, LinearScaling()), (0.0, 8.0, 8.0, 1.0, LinearScaling()), (0.0, 8.0, 2.0, 0.25, LinearScaling()), (100.2, 100.6, 100.4, 0.5, LinearScaling()), (-2.0, 8.0, 0.0, 0.2, LinearScaling()), (-11.0, -1.0, -10.0, 0.1, LinearScaling()), (1.0, 8.0, 1.0, 0.0, LogScaling()), (1.0, 8.0, 8.0, 1.0, LogScaling()), (1.0, 10000.0, 10.0, 0.25, LogScaling()), (1.0, 10000.0, 100.0, 0.5, LogScaling()), (1.0, 10000.0, 1000.0, 0.75, LogScaling()), (0.001, 0.1, 0.01, 0.5, LogScaling()), (0.1, 100, 1.0, 1.0 / 3, LogScaling()), ]) def test_continuous_to_and_from_ndarray(lower, upper, external_hp, internal_ndarray, scaling):
def hp_ranges(): return HyperparameterRanges_Impl( HyperparameterRangeInteger('hp1', 0, 200, LinearScaling()), HyperparameterRangeCategorical('hp2', ('a', 'b', 'c')))
import pytest from autogluon.searcher.bayesopt.datatypes.hp_ranges import \ HyperparameterRanges_Impl, HyperparameterRangeInteger, \ HyperparameterRangeContinuous, HyperparameterRangeCategorical from autogluon.searcher.bayesopt.datatypes.scaling import LinearScaling from autogluon.searcher.bayesopt.utils.duplicate_detector import \ DuplicateDetectorEpsilon, DuplicateDetectorIdentical, \ DuplicateDetectorNoDetection hp_ranges = HyperparameterRanges_Impl( HyperparameterRangeInteger('hp1', 0, 1000000000, scaling=LinearScaling()), HyperparameterRangeContinuous('hp2', -10.0, 10.0, scaling=LinearScaling()), HyperparameterRangeCategorical('hp3', ('a', 'b', 'c')), ) duplicate_detector_epsilon = DuplicateDetectorEpsilon(hp_ranges) @pytest.mark.parametrize('existing, new, contained', [ ({(10, 1.0, 'a'), (20, 2.0, 'b')}, (10000, 3.0, 'c'), False), ({(10, 1.0, 'a'), (20, 2.0, 'b')}, (10, 1.000001, 'a'), False), ({(10, 1.0, 'a'), (20, 2.0, 'b')}, (20, 2.000001, 'b'), False), ({(10, 1.0, 'a'), (20, 2.0, 'b')}, (25, 1.0, 'a'), False), ({(10, 1.0, 'a'), (20, 2.0, 'b')}, (10, 1.0, 'a'), True), ({(10, 1.0, 'a'), (20, 2.0, 'b')}, (20, 2.0, 'b'), True), ({(10, 1.0, 'a'), (20, 2.0, 'b')}, (19, 1.0, 'a'), True), ({(10, 1.0, 'a'), (20, 2.0, 'b')}, (10, 1.0000001, 'a'), True), ({(10, 1.0, 'a'), (20, 2.0, 'b')}, (10, 1.0, 'c'), False), ({(10, 1.0, 'a'), (20, 2.0, 'b')}, (10, 1.0, 'b'), False), ({(10, 1.0, 'a'), (20, 2.0, 'b')}, (20, 1.0, 'b'), False),
def test_to_ndarray(): np.random.seed(123456) random_state = np.random.RandomState(123456) prob_categ = 0.3 for iter in range(20): # Create ConfigurationSpace num_hps = np.random.randint(low=1, high=20) if iter == 0: _prob_categ = 0. elif iter == 1: _prob_categ = 1. else: _prob_categ = prob_categ config_space = CS.ConfigurationSpace() ndarray_size = 0 _hp_ranges = dict() for hp_it in range(num_hps): name = str(hp_it) if np.random.random() < _prob_categ: num_choices = np.random.randint(low=2, high=11) choices = tuple([str(i) for i in range(num_choices)]) hp = CSH.CategoricalHyperparameter(name, choices=choices) hp2 = HyperparameterRangeCategorical(name, choices) ndarray_size += num_choices else: ndarray_size += 1 rand_coin = np.random.random() if rand_coin < 0.5: log_scaling = (rand_coin < 0.25) hp = CSH.UniformFloatHyperparameter(name=name, lower=0.5, upper=5., log=log_scaling) hp2 = HyperparameterRangeContinuous( name, lower_bound=0.5, upper_bound=5., scaling=LogScaling() if log_scaling else LinearScaling()) else: log_scaling = (rand_coin < 0.75) hp = CSH.UniformIntegerHyperparameter(name=name, lower=2, upper=10, log=log_scaling) hp2 = HyperparameterRangeInteger( name=name, lower_bound=2, upper_bound=10, scaling=LogScaling() if log_scaling else LinearScaling()) config_space.add_hyperparameter(hp) _hp_ranges[name] = hp2 hp_ranges_cs = HyperparameterRanges_CS(config_space) hp_ranges = HyperparameterRanges_Impl( *[_hp_ranges[x] for x in config_space.get_hyperparameter_names()]) # Compare ndarrays created by both codes for cmp_it in range(5): config_cs = hp_ranges_cs.random_candidate(random_state) _config = config_cs.get_dictionary() config = (_config[name] for name in config_space.get_hyperparameter_names()) ndarr_cs = hp_ranges_cs.to_ndarray(config_cs) ndarr = hp_ranges.to_ndarray(config) assert_allclose(ndarr_cs, ndarr, rtol=1e-4)
# TODO: This code tests XYZScaling, which is only needed for HyperparameterRanges. # If the latter code is removed, this test can go as well. import pytest from numpy.testing import assert_almost_equal from autogluon.searcher.bayesopt.datatypes.scaling import LinearScaling, \ LogScaling, ReverseLogScaling @pytest.mark.parametrize('value, expected, scaling', [ (0.0, 0.0, LinearScaling()), (0.5, 0.5, LinearScaling()), (5.0, 5.0, LinearScaling()), (-5.0, -5.0, LinearScaling()), (0.5, -0.69314718055994529, LogScaling()), (5.0, 1.6094379124341003, LogScaling()), (0.0, 0.0, ReverseLogScaling()), (0.5, 0.69314718055994529, ReverseLogScaling()) ]) def test_to_internal(value, expected, scaling): assert_almost_equal(expected, scaling.to_internal(value)) @pytest.mark.parametrize('value, expected, scaling', [ (0.0001, -9.210340371976182, LogScaling()), (0.000001, -13.815510557964274, LogScaling()), (0.0001, 0.00010000500033334732, ReverseLogScaling()), (0.000001, 1.000000500029089e-06, ReverseLogScaling()), (0.9999, 9.210340371976294, ReverseLogScaling()), (0.999999, 13.815510557935518, ReverseLogScaling())
def test_gp_fantasizing(): """ Compare whether acquisition function evaluations (values, gradients) with fantasizing are the same as averaging them by hand. """ random_seed = 4567 _set_seeds(random_seed) num_fantasy_samples = 10 num_pending = 5 hp_ranges = HyperparameterRanges_Impl( HyperparameterRangeContinuous('x', 0.0, 1.0, LinearScaling()), HyperparameterRangeContinuous('y', 0.0, 1.0, LinearScaling())) X = [ (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0), ] num_data = len(X) Y = [ dictionarize_objective(np.random.randn(1, 1)) for _ in range(num_data) ] # Draw fantasies. This is done for a number of fixed pending candidates # The model parameters are fit in the first iteration, when there are # no pending candidates # Note: It is important to not normalize targets, because this would be # done on the observed targets only, not the fantasized ones, so it # would be hard to compare below. pending_evaluations = [] for _ in range(num_pending): pending_cand = tuple(np.random.rand(2, )) pending_evaluations.append(PendingEvaluation(pending_cand)) state = TuningJobState(hp_ranges, [CandidateEvaluation(x, y) for x, y in zip(X, Y)], failed_candidates=[], pending_evaluations=pending_evaluations) gpmodel = default_gpmodel(state, random_seed, optimization_config=DEFAULT_OPTIMIZATION_CONFIG) model = GPMXNetModel(state, DEFAULT_METRIC, random_seed, gpmodel, fit_parameters=True, num_fantasy_samples=num_fantasy_samples, normalize_targets=False) fantasy_samples = model.fantasy_samples # Evaluate acquisition function and gradients with fantasizing num_test = 50 X_test = np.vstack([ hp_ranges.to_ndarray(tuple(np.random.rand(2, ))) for _ in range(num_test) ]) acq_func = EIAcquisitionFunction(model) fvals, grads = acq_func.compute_acq_with_gradients(X_test) # Do the same computation by averaging by hand fvals_cmp = np.empty((num_fantasy_samples, ) + fvals.shape) grads_cmp = np.empty((num_fantasy_samples, ) + grads.shape) X_full = X + state.pending_candidates for it in range(num_fantasy_samples): Y_full = Y + [ dictionarize_objective(eval.fantasies[DEFAULT_METRIC][:, it]) for eval in fantasy_samples ] state2 = TuningJobState( hp_ranges, [CandidateEvaluation(x, y) for x, y in zip(X_full, Y_full)], failed_candidates=[], pending_evaluations=[]) # We have to skip parameter optimization here model2 = GPMXNetModel(state2, DEFAULT_METRIC, random_seed, gpmodel, fit_parameters=False, num_fantasy_samples=num_fantasy_samples, normalize_targets=False) acq_func2 = EIAcquisitionFunction(model2) fvals_, grads_ = acq_func2.compute_acq_with_gradients(X_test) fvals_cmp[it, :] = fvals_ grads_cmp[it, :] = grads_ # Comparison fvals2 = np.mean(fvals_cmp, axis=0) grads2 = np.mean(grads_cmp, axis=0) assert np.allclose(fvals, fvals2) assert np.allclose(grads, grads2)