def test_td3_impl(observation_shape, action_size, actor_learning_rate, critic_learning_rate, gamma, tau, reguralizing_rate, n_critics, bootstrap, share_encoder, target_smoothing_sigma, target_smoothing_clip, eps, use_batch_norm, q_func_type, scaler, augmentation, n_augmentations, encoder_params): impl = TD3Impl(observation_shape, action_size, actor_learning_rate, critic_learning_rate, gamma, tau, reguralizing_rate, n_critics, bootstrap, share_encoder, target_smoothing_sigma, target_smoothing_clip, eps, use_batch_norm, q_func_type=q_func_type, use_gpu=False, scaler=scaler, augmentation=augmentation, n_augmentations=n_augmentations, encoder_params=encoder_params) torch_impl_tester(impl, discrete=False, deterministic_best_action=q_func_type != 'iqn')
def test_td3_impl( observation_shape, action_size, actor_learning_rate, critic_learning_rate, actor_optim_factory, critic_optim_factory, encoder_factory, q_func_factory, gamma, tau, n_critics, bootstrap, share_encoder, target_reduction_type, target_smoothing_sigma, target_smoothing_clip, scaler, action_scaler, augmentation, ): impl = TD3Impl( observation_shape, action_size, actor_learning_rate, critic_learning_rate, actor_optim_factory, critic_optim_factory, encoder_factory, encoder_factory, create_q_func_factory(q_func_factory), gamma, tau, n_critics, bootstrap, share_encoder, target_reduction_type, target_smoothing_sigma, target_smoothing_clip, use_gpu=False, scaler=scaler, action_scaler=action_scaler, augmentation=augmentation, ) torch_impl_tester(impl, discrete=False, deterministic_best_action=q_func_factory != "iqn")
def test_td3_impl( observation_shape, action_size, actor_learning_rate, critic_learning_rate, actor_optim_factory, critic_optim_factory, encoder_factory, q_func_factory, gamma, tau, n_critics, target_reduction_type, target_smoothing_sigma, target_smoothing_clip, scaler, action_scaler, reward_scaler, ): impl = TD3Impl( observation_shape=observation_shape, action_size=action_size, actor_learning_rate=actor_learning_rate, critic_learning_rate=critic_learning_rate, actor_optim_factory=actor_optim_factory, critic_optim_factory=critic_optim_factory, actor_encoder_factory=encoder_factory, critic_encoder_factory=encoder_factory, q_func_factory=create_q_func_factory(q_func_factory), gamma=gamma, tau=tau, n_critics=n_critics, target_reduction_type=target_reduction_type, target_smoothing_sigma=target_smoothing_sigma, target_smoothing_clip=target_smoothing_clip, use_gpu=None, scaler=scaler, action_scaler=action_scaler, reward_scaler=reward_scaler, ) torch_impl_tester(impl, discrete=False, deterministic_best_action=q_func_factory != "iqn")