def test_sac( observation_shape, action_size, q_func_factory, scaler, action_scaler ): sac = SAC( q_func_factory=q_func_factory, scaler=scaler, action_scaler=action_scaler, ) algo_tester(sac, observation_shape) algo_update_tester(sac, observation_shape, action_size)
def test_sac( observation_shape, action_size, q_func_factory, scalers, target_reduction_type, ): scaler, action_scaler, reward_scaler = scalers sac = SAC( q_func_factory=q_func_factory, scaler=scaler, action_scaler=action_scaler, reward_scaler=reward_scaler, target_reduction_type=target_reduction_type, ) algo_tester(sac, observation_shape, test_policy_copy=True, test_q_function_copy=True) algo_update_tester(sac, observation_shape, action_size)
def test_sac_performance(q_func_factory): if q_func_factory == "iqn" or q_func_factory == "fqf": pytest.skip("IQN is computationally expensive") sac = SAC(q_func_factory=q_func_factory) algo_pendulum_tester(sac, n_trials=3)
def test_sac_performance(q_func_type): if q_func_type == 'iqn' or q_func_type == 'fqf': pytest.skip('IQN is computationally expensive') sac = SAC(q_func_type=q_func_type) algo_pendulum_tester(sac, n_trials=3)
def test_sac(observation_shape, action_size, q_func_type, scaler): sac = SAC(q_func_type=q_func_type, scaler=scaler) algo_tester(sac, observation_shape) algo_update_tester(sac, observation_shape, action_size)