def test_mean_q_function_factory(observation_shape, action_size): encoder = _create_encoder(observation_shape, action_size) factory = MeanQFunctionFactory() if action_size is None: q_func = factory.create_continuous(encoder) assert isinstance(q_func, ContinuousMeanQFunction) else: q_func = factory.create_discrete(encoder, action_size) assert isinstance(q_func, DiscreteMeanQFunction) assert factory.get_type() == "mean" params = factory.get_params() new_factory = MeanQFunctionFactory(**params) assert new_factory.get_params() == params
def test_create_continuous_q_function( observation_shape, action_size, batch_size, n_ensembles, encoder_factory, share_encoder, bootstrap, ): q_func_factory = MeanQFunctionFactory(share_encoder=share_encoder, bootstrap=bootstrap) q_func = create_continuous_q_function( observation_shape, action_size, encoder_factory, q_func_factory, n_ensembles, ) assert isinstance(q_func, EnsembleContinuousQFunction) if n_ensembles == 1: assert q_func.bootstrap == False else: assert q_func.bootstrap == bootstrap # check share_encoder encoder = q_func.q_funcs[0].encoder for q_func in q_func.q_funcs[1:]: if share_encoder: assert encoder is q_func.encoder else: assert encoder is not q_func.encoder x = torch.rand((batch_size, ) + observation_shape) action = torch.rand(batch_size, action_size) y = q_func(x, action) assert y.shape == (batch_size, 1)
check_scaler, check_use_gpu, ) from d3rlpy.gpu import Device from d3rlpy.models.encoders import DefaultEncoderFactory from d3rlpy.models.q_functions import MeanQFunctionFactory from d3rlpy.preprocessing.action_scalers import MinMaxActionScaler from d3rlpy.preprocessing.scalers import MinMaxScaler @pytest.mark.parametrize("value", ["default", DefaultEncoderFactory()]) def test_check_encoder(value): assert isinstance(check_encoder(value), DefaultEncoderFactory) @pytest.mark.parametrize("value", ["mean", MeanQFunctionFactory()]) def test_check_q_func(value): assert isinstance(check_q_func(value), MeanQFunctionFactory) @pytest.mark.parametrize("value", ["min_max", MinMaxScaler(), None]) def test_check_scaler(value): scaler = check_scaler(value) if value is None: assert scaler is None else: assert isinstance(scaler, MinMaxScaler) @pytest.mark.parametrize("value", ["min_max", MinMaxActionScaler(), None]) def test_check_action_scaler(value):
loss = q_func.compute_error(obs_t, act_t, rew_tp1, q_tp1, gamma) if bootstrap: assert not torch.allclose(ref_td_sum, loss) elif q_func_type != "iqn": assert torch.allclose(ref_td_sum, loss) # check layer connection check_parameter_updates(q_func, (obs_t, act_t, rew_tp1, q_tp1)) @pytest.mark.parametrize("observation_shape", [(4, 84, 84), (100, )]) @pytest.mark.parametrize("action_size", [3]) @pytest.mark.parametrize("encoder_factory", [DefaultEncoderFactory()]) @pytest.mark.parametrize( "q_func_factory", [MeanQFunctionFactory(), QRQFunctionFactory()]) @pytest.mark.parametrize("n_ensembles", [2]) @pytest.mark.parametrize("batch_size", [100]) @pytest.mark.parametrize("n_quantiles", [32]) @pytest.mark.parametrize("n_actions", [10]) @pytest.mark.parametrize("lam", [0.75]) def test_compute_max_with_n_actions( observation_shape, action_size, encoder_factory, q_func_factory, n_ensembles, batch_size, n_quantiles, n_actions, lam,
policy = create_categorical_policy(observation_shape, action_size, encoder_factory) assert isinstance(policy, CategoricalPolicy) x = torch.rand((batch_size, ) + observation_shape) y = policy(x) assert y.shape == (batch_size, ) @pytest.mark.parametrize("observation_shape", [(4, 84, 84), (100, )]) @pytest.mark.parametrize("action_size", [2]) @pytest.mark.parametrize("batch_size", [32]) @pytest.mark.parametrize("n_ensembles", [1, 5]) @pytest.mark.parametrize("encoder_factory", [DefaultEncoderFactory()]) @pytest.mark.parametrize("q_func_factory", [MeanQFunctionFactory()]) @pytest.mark.parametrize("share_encoder", [False, True]) def test_create_discrete_q_function( observation_shape, action_size, batch_size, n_ensembles, encoder_factory, q_func_factory, share_encoder, ): q_func = create_discrete_q_function( observation_shape, action_size, encoder_factory, q_func_factory,
import pytest import torch from d3rlpy.models.builders import create_continuous_q_function from d3rlpy.models.encoders import DefaultEncoderFactory from d3rlpy.models.q_functions import MeanQFunctionFactory, QRQFunctionFactory from d3rlpy.models.torch.q_functions import compute_max_with_n_actions @pytest.mark.parametrize("observation_shape", [(4, 84, 84), (100,)]) @pytest.mark.parametrize("action_size", [3]) @pytest.mark.parametrize("encoder_factory", [DefaultEncoderFactory()]) @pytest.mark.parametrize( "q_func_factory", [MeanQFunctionFactory(), QRQFunctionFactory()] ) @pytest.mark.parametrize("n_ensembles", [2]) @pytest.mark.parametrize("batch_size", [100]) @pytest.mark.parametrize("n_quantiles", [32]) @pytest.mark.parametrize("n_actions", [10]) @pytest.mark.parametrize("lam", [0.75]) def test_compute_max_with_n_actions( observation_shape, action_size, encoder_factory, q_func_factory, n_ensembles, batch_size, n_quantiles, n_actions, lam, ):