Exemplo n.º 1
0
 def test_minimizer_step(self):
     minim = local_minimizer()
     params = {"actions": {"dtype": judo.float64, "size": (2, )}}
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     assert minim.shape == minim.shape
     states = minim.step(model_states=states,
                         env_states=minim.reset(N_WALKERS))
     assert judo.allclose(states.rewards.min(), 0)
Exemplo n.º 2
0
    def test_sample(self):
        bounds = Bounds(low=-5, high=5, shape=(3, ))
        model = NormalContinuous(bounds=bounds)
        actions = model.predict(batch_size=10000).actions
        assert actions.min() >= -5
        assert actions.max() <= 5
        assert judo.allclose(actions.mean(), tensor(0.0), atol=0.05)
        assert judo.allclose(actions.std(), tensor(1.0), atol=0.05)

        bounds = Bounds(low=-10, high=30, shape=(3, 10))
        model = NormalContinuous(bounds=bounds, loc=5, scale=2)
        actions = model.predict(batch_size=10000).actions
        assert actions.min() >= -10
        assert actions.max() <= 30
        assert judo.allclose(actions.mean(), tensor(5.0),
                             atol=0.05), actions.mean()
        assert judo.allclose(actions.std(), tensor(2.0),
                             atol=0.05), actions.std()
Exemplo n.º 3
0
    def test_from_array_with_scale_positive(self):
        array = tensor([[0, 0, 0], [10, 0, 0], [0, 10, 0], [10, 10, 10]],
                       dtype=judo.float)
        bounds = Bounds.from_array(array, scale=1.1)
        assert (bounds.low == tensor([0, 0, 0], dtype=judo.float)).all(), (
            bounds.low,
            array.min(axis=0),
        )
        assert (bounds.high == tensor([11, 11, 11], dtype=judo.float)).all(), (
            bounds.high,
            array.max(axis=0),
        )
        assert bounds.shape == (3, )

        array = tensor(
            [[-10, 0, 0], [-10, 0, 0], [0, -10, 0], [-10, -10, -10]],
            dtype=judo.float)
        bounds = Bounds.from_array(array, scale=1.1)
        assert (bounds.high == tensor([0, 0, 0], dtype=judo.float)).all(), (
            bounds.high,
            array.max(axis=0),
        )
        assert (bounds.low == tensor([-11, -11, -11],
                                     dtype=judo.float)).all(), (
                                         bounds.low,
                                         array.min(axis=0),
                                     )
        assert bounds.shape == (3, )

        array = tensor(
            [[10, 10, 10], [100, 10, 10], [10, 100, 10], [100, 100, 100]],
            dtype=judo.float)
        bounds = Bounds.from_array(array, scale=1.1)
        assert judo.allclose(bounds.low, tensor([9.0, 9.0, 9],
                                                dtype=judo.float)), (
                                                    bounds.low,
                                                    array.min(axis=0),
                                                )
        assert judo.allclose(bounds.high,
                             tensor([110, 110, 110], dtype=judo.float)), (
                                 bounds.high,
                                 array.max(axis=0),
                             )
        assert bounds.shape == (3, )
Exemplo n.º 4
0
 def test_clip(self):
     tup = ((-1, 10), (-3, 4), (2, 5))
     array = tensor([[-10, 0, 0], [11, 0, 0], [0, 11, 0], [11, 11, 11]],
                    dtype=judo.float)
     bounds = Bounds.from_tuples(tup)
     clipped = bounds.clip(array)
     target = tensor(
         [[-1.0, 0.0, 2.0], [10.0, 0.0, 2.0], [0.0, 4.0, 2], [10, 4, 5]],
         dtype=judo.float)
     assert judo.allclose(clipped, target), (clipped.dtype, target.dtype)
Exemplo n.º 5
0
 def test_safe_margin(self, bounds_fixture: Bounds):
     new_bounds = bounds_fixture.safe_margin()
     assert judo.allclose(new_bounds.low, bounds_fixture.low)
     assert judo.allclose(new_bounds.high, bounds_fixture.high)
     low = judo.full_like(bounds_fixture.low, -10)
     new_bounds = bounds_fixture.safe_margin(low=low)
     assert judo.allclose(new_bounds.high, bounds_fixture.high)
     assert judo.allclose(new_bounds.low, low)
     new_bounds = bounds_fixture.safe_margin(low=low, scale=2)
     assert judo.allclose(new_bounds.high, bounds_fixture.high * 2)
     assert judo.allclose(new_bounds.low, low * 2)
Exemplo n.º 6
0
    def test_sample_with_critic(self, n_actions):
        model = DiscreteUniform(n_actions=n_actions, critic=DummyCritic())
        model_states = model.predict(batch_size=1000)
        actions = model_states.actions
        assert len(actions.shape) == 1
        assert len(judo.unique(actions)) <= n_actions
        assert all(actions >= 0)
        assert all(actions <= n_actions)
        assert "critic_score" in model_states.keys()
        assert (model_states.critic_score == 5).all()

        states = create_model_states(batch_size=100, model=model)
        model_states = model.sample(batch_size=states.n, model_states=states)
        actions = model_states.actions
        assert len(actions.shape) == 1
        assert len(judo.unique(actions)) <= n_actions
        assert all(actions >= 0)
        assert all(actions <= n_actions)
        assert judo.allclose(actions, judo.astype(actions, dtype.int))
        assert "critic_score" in model_states.keys()
        assert (model_states.critic_score == 5).all()
Exemplo n.º 7
0
 def test_optimum(self, wiki_benchmark):
     best = wiki_benchmark.best_state
     new_shape = (1, ) + tuple(best.shape)
     val = wiki_benchmark.function(best.reshape(new_shape))
     bench = wiki_benchmark.benchmark
     assert judo.allclose(val[0], bench), wiki_benchmark.__class__.__name__
Exemplo n.º 8
0
 def test_from_array_with_scale_negative(self):
     # high +, low +, scale > 1
     array = tensor(
         [[-10, 0, 0], [-10, 0, 0], [0, -10, 0], [-10, -10, -10]],
         dtype=judo.float)
     bounds = Bounds.from_array(array, scale=0.9)
     assert (bounds.high == tensor([0, 0, 0], dtype=judo.float)).all(), (
         bounds.high,
         array.max(axis=0),
     )
     assert (bounds.low == tensor([-9, -9, -9], dtype=judo.float)).all(), (
         bounds.low,
         array.min(axis=0),
     )
     assert bounds.shape == (3, )
     array = tensor([[0, 0, 0], [10, 0, 0], [0, 10, 0], [10, 10, 10]],
                    dtype=judo.float)
     bounds = Bounds.from_array(array, scale=0.9)
     assert (bounds.low == tensor([0, 0, 0],
                                  dtype=judo.float)).all(), (bounds, array)
     assert (bounds.high == tensor([9, 9, 9], dtype=judo.float)).all()
     assert bounds.shape == (3, )
     # high +, low +, scale < 1
     array = tensor(
         [[10, 10, 10], [100, 10, 10], [10, 100, 10], [100, 100, 100]],
         dtype=judo.float)
     bounds = Bounds.from_array(array, scale=0.9)
     assert judo.allclose(bounds.low,
                          tensor([9.0, 9.0, 9.0], dtype=judo.float)), (
                              bounds.low,
                              array.min(axis=0),
                          )
     assert judo.allclose(bounds.high, tensor([90, 90, 90],
                                              dtype=judo.float)), (
                                                  bounds.high,
                                                  array.max(axis=0),
                                              )
     assert bounds.shape == (3, )
     # high -, low -, scale > 1
     array = tensor(
         [[-100, -10, -10], [-100, -10, -10], [-10, -100, -10],
          [-100, -100, -100]],
         dtype=judo.float,
     )
     bounds = Bounds.from_array(array, scale=1.1)
     assert judo.allclose(bounds.high, tensor([-9, -9, -9],
                                              dtype=judo.float)), (
                                                  bounds.high,
                                                  array.max(axis=0),
                                              )
     assert judo.allclose(bounds.low,
                          tensor([-110, -110, -110], dtype=judo.float)), (
                              bounds.low,
                              array.min(axis=0),
                          )
     assert bounds.shape == (3, )
     # high -, low -, scale < 1
     array = tensor(
         [[-100, -10, -10], [-100, -10, -10], [-10, -100, -10],
          [-100, -100, -100]],
         dtype=judo.float,
     )
     bounds = Bounds.from_array(array, scale=0.9)
     assert judo.allclose(bounds.high,
                          tensor([-11, -11, -11], dtype=judo.float)), (
                              bounds.high,
                              array.max(axis=0),
                          )
     assert judo.allclose(bounds.low,
                          tensor([-90, -90, -90], dtype=judo.float)), (
                              bounds.low,
                              array.min(axis=0),
                          )
     assert bounds.shape == (3, )