def test_vec_noise(): num_envs = 4 num_actions = 10 mu = np.zeros(num_actions) sigma = np.ones(num_actions) * 0.4 base: ActionNoise = OrnsteinUhlenbeckActionNoise(mu, sigma) with pytest.raises(ValueError): vec = VectorizedActionNoise(base, -1) with pytest.raises(ValueError): vec = VectorizedActionNoise(base, None) with pytest.raises(ValueError): vec = VectorizedActionNoise(base, "whatever") vec = VectorizedActionNoise(base, num_envs) assert vec.n_envs == num_envs assert vec().shape == (num_envs, num_actions) assert not (vec() == base()).all() with pytest.raises(ValueError): vec = VectorizedActionNoise(None, num_envs) with pytest.raises(TypeError): vec = VectorizedActionNoise(12, num_envs) with pytest.raises(AssertionError): vec.noises = [] with pytest.raises(TypeError): vec.noises = None with pytest.raises(ValueError): vec.noises = [None] * vec.n_envs with pytest.raises(AssertionError): vec.noises = [base] * (num_envs - 1) assert all(isinstance(noise, type(base)) for noise in vec.noises) assert len(vec.noises) == num_envs
def run(train_freq, gradient_steps, batch_size, envname, n_envs, log_interval, learning_rate, buffer_size, tau, gamma, target_policy_noise, target_noise_clip, learning_starts, total_timesteps, policy_kwargs, action_noise_mean, action_noise_sigma, noise_type, eval_freq, n_eval_episodes, verbose=True, tensorboard_log="logs/"): # Normalize with multi environments eval_freq = max(eval_freq // n_envs, 1) buffer_size = max(buffer_size // n_envs, 1) all_args = locals() path = "/" + os.path.join(*sb3.__file__.split("/")[:-2]) commit_num = subprocess.check_output(["git", "describe", "--always"], cwd=path).strip().decode() env = gym.make(envname) vecenv = make_vec_env(envname, vec_env_cls=SubprocVecEnv, n_envs=n_envs) # The noise objects for DDPG n_actions = env.action_space.shape[-1] if noise_type == "OU": base_noise_class = OrnsteinUhlenbeckActionNoise elif noise_type == "Normal": base_noise_class = NormalActionNoise base_noise = base_noise_class(mean=np.ones(n_actions) * action_noise_mean, sigma=action_noise_sigma * np.ones(n_actions)) action_noise = VectorizedActionNoise(base_noise, vecenv.num_envs) # Callbacks loggercallback = LoggerCallback("json", [("arguments", all_args), ("git", commit_num)]) evalcallback = EvalCallback(make_vec_env(envname, vec_env_cls=SubprocVecEnv), n_eval_episodes=n_eval_episodes, eval_freq=eval_freq) # Initiate the model and start learning model = TD3("MlpPolicy", vecenv, action_noise=action_noise, batch_size=batch_size, train_freq=train_freq, gradient_steps=gradient_steps, learning_starts=learning_starts, n_episodes_rollout=-1, learning_rate=learning_rate, buffer_size=buffer_size, tau=tau, gamma=gamma, create_eval_env=True, target_policy_noise=target_policy_noise, target_noise_clip=target_noise_clip, verbose=verbose, policy_kwargs=policy_kwargs, tensorboard_log=tensorboard_log, device="cuda") model.learn( total_timesteps=total_timesteps, log_interval=log_interval, callback=[loggercallback, evalcallback], tb_log_name=envname, ) model.env.close() evalcallback.eval_env.close() return evalcallback.best_mean_reward