def test_minimizer_step(self): minim = local_minimizer() params = {"actions": {"dtype": judo.float64, "size": (2, )}} states = StatesModel(state_dict=params, batch_size=N_WALKERS) assert minim.shape == minim.shape states = minim.step(model_states=states, env_states=minim.reset(N_WALKERS)) assert judo.allclose(states.rewards.min(), 0)
def test_sample(self): bounds = Bounds(low=-5, high=5, shape=(3, )) model = NormalContinuous(bounds=bounds) actions = model.predict(batch_size=10000).actions assert actions.min() >= -5 assert actions.max() <= 5 assert judo.allclose(actions.mean(), tensor(0.0), atol=0.05) assert judo.allclose(actions.std(), tensor(1.0), atol=0.05) bounds = Bounds(low=-10, high=30, shape=(3, 10)) model = NormalContinuous(bounds=bounds, loc=5, scale=2) actions = model.predict(batch_size=10000).actions assert actions.min() >= -10 assert actions.max() <= 30 assert judo.allclose(actions.mean(), tensor(5.0), atol=0.05), actions.mean() assert judo.allclose(actions.std(), tensor(2.0), atol=0.05), actions.std()
def test_from_array_with_scale_positive(self): array = tensor([[0, 0, 0], [10, 0, 0], [0, 10, 0], [10, 10, 10]], dtype=judo.float) bounds = Bounds.from_array(array, scale=1.1) assert (bounds.low == tensor([0, 0, 0], dtype=judo.float)).all(), ( bounds.low, array.min(axis=0), ) assert (bounds.high == tensor([11, 11, 11], dtype=judo.float)).all(), ( bounds.high, array.max(axis=0), ) assert bounds.shape == (3, ) array = tensor( [[-10, 0, 0], [-10, 0, 0], [0, -10, 0], [-10, -10, -10]], dtype=judo.float) bounds = Bounds.from_array(array, scale=1.1) assert (bounds.high == tensor([0, 0, 0], dtype=judo.float)).all(), ( bounds.high, array.max(axis=0), ) assert (bounds.low == tensor([-11, -11, -11], dtype=judo.float)).all(), ( bounds.low, array.min(axis=0), ) assert bounds.shape == (3, ) array = tensor( [[10, 10, 10], [100, 10, 10], [10, 100, 10], [100, 100, 100]], dtype=judo.float) bounds = Bounds.from_array(array, scale=1.1) assert judo.allclose(bounds.low, tensor([9.0, 9.0, 9], dtype=judo.float)), ( bounds.low, array.min(axis=0), ) assert judo.allclose(bounds.high, tensor([110, 110, 110], dtype=judo.float)), ( bounds.high, array.max(axis=0), ) assert bounds.shape == (3, )
def test_clip(self): tup = ((-1, 10), (-3, 4), (2, 5)) array = tensor([[-10, 0, 0], [11, 0, 0], [0, 11, 0], [11, 11, 11]], dtype=judo.float) bounds = Bounds.from_tuples(tup) clipped = bounds.clip(array) target = tensor( [[-1.0, 0.0, 2.0], [10.0, 0.0, 2.0], [0.0, 4.0, 2], [10, 4, 5]], dtype=judo.float) assert judo.allclose(clipped, target), (clipped.dtype, target.dtype)
def test_safe_margin(self, bounds_fixture: Bounds): new_bounds = bounds_fixture.safe_margin() assert judo.allclose(new_bounds.low, bounds_fixture.low) assert judo.allclose(new_bounds.high, bounds_fixture.high) low = judo.full_like(bounds_fixture.low, -10) new_bounds = bounds_fixture.safe_margin(low=low) assert judo.allclose(new_bounds.high, bounds_fixture.high) assert judo.allclose(new_bounds.low, low) new_bounds = bounds_fixture.safe_margin(low=low, scale=2) assert judo.allclose(new_bounds.high, bounds_fixture.high * 2) assert judo.allclose(new_bounds.low, low * 2)
def test_sample_with_critic(self, n_actions): model = DiscreteUniform(n_actions=n_actions, critic=DummyCritic()) model_states = model.predict(batch_size=1000) actions = model_states.actions assert len(actions.shape) == 1 assert len(judo.unique(actions)) <= n_actions assert all(actions >= 0) assert all(actions <= n_actions) assert "critic_score" in model_states.keys() assert (model_states.critic_score == 5).all() states = create_model_states(batch_size=100, model=model) model_states = model.sample(batch_size=states.n, model_states=states) actions = model_states.actions assert len(actions.shape) == 1 assert len(judo.unique(actions)) <= n_actions assert all(actions >= 0) assert all(actions <= n_actions) assert judo.allclose(actions, judo.astype(actions, dtype.int)) assert "critic_score" in model_states.keys() assert (model_states.critic_score == 5).all()
def test_optimum(self, wiki_benchmark): best = wiki_benchmark.best_state new_shape = (1, ) + tuple(best.shape) val = wiki_benchmark.function(best.reshape(new_shape)) bench = wiki_benchmark.benchmark assert judo.allclose(val[0], bench), wiki_benchmark.__class__.__name__
def test_from_array_with_scale_negative(self): # high +, low +, scale > 1 array = tensor( [[-10, 0, 0], [-10, 0, 0], [0, -10, 0], [-10, -10, -10]], dtype=judo.float) bounds = Bounds.from_array(array, scale=0.9) assert (bounds.high == tensor([0, 0, 0], dtype=judo.float)).all(), ( bounds.high, array.max(axis=0), ) assert (bounds.low == tensor([-9, -9, -9], dtype=judo.float)).all(), ( bounds.low, array.min(axis=0), ) assert bounds.shape == (3, ) array = tensor([[0, 0, 0], [10, 0, 0], [0, 10, 0], [10, 10, 10]], dtype=judo.float) bounds = Bounds.from_array(array, scale=0.9) assert (bounds.low == tensor([0, 0, 0], dtype=judo.float)).all(), (bounds, array) assert (bounds.high == tensor([9, 9, 9], dtype=judo.float)).all() assert bounds.shape == (3, ) # high +, low +, scale < 1 array = tensor( [[10, 10, 10], [100, 10, 10], [10, 100, 10], [100, 100, 100]], dtype=judo.float) bounds = Bounds.from_array(array, scale=0.9) assert judo.allclose(bounds.low, tensor([9.0, 9.0, 9.0], dtype=judo.float)), ( bounds.low, array.min(axis=0), ) assert judo.allclose(bounds.high, tensor([90, 90, 90], dtype=judo.float)), ( bounds.high, array.max(axis=0), ) assert bounds.shape == (3, ) # high -, low -, scale > 1 array = tensor( [[-100, -10, -10], [-100, -10, -10], [-10, -100, -10], [-100, -100, -100]], dtype=judo.float, ) bounds = Bounds.from_array(array, scale=1.1) assert judo.allclose(bounds.high, tensor([-9, -9, -9], dtype=judo.float)), ( bounds.high, array.max(axis=0), ) assert judo.allclose(bounds.low, tensor([-110, -110, -110], dtype=judo.float)), ( bounds.low, array.min(axis=0), ) assert bounds.shape == (3, ) # high -, low -, scale < 1 array = tensor( [[-100, -10, -10], [-100, -10, -10], [-10, -100, -10], [-100, -100, -100]], dtype=judo.float, ) bounds = Bounds.from_array(array, scale=0.9) assert judo.allclose(bounds.high, tensor([-11, -11, -11], dtype=judo.float)), ( bounds.high, array.max(axis=0), ) assert judo.allclose(bounds.low, tensor([-90, -90, -90], dtype=judo.float)), ( bounds.low, array.min(axis=0), ) assert bounds.shape == (3, )