def random_policy_test(steps=100, verbose=False): print("==== Results with Random Policy ====") config = Config() actions = 3 config.current_step = 0 env = MountainCar(config) cumulative_reward = 0 terminations = 0 steps_per_episode = [] episode_steps = 0 for i in range(steps): A = np.random.randint(actions) old_state = env.get_current_state() next_S, R, terminate = env.step(A) if verbose: print("Old state:", np.round(old_state, 3), "-->", "Action:", A, "-->", "New state:", np.round(next_S, 3)) cumulative_reward += R episode_steps += 1 if terminate: if verbose: print("\n## Reset ##\n") if terminate: terminations += 1 steps_per_episode.append(episode_steps) episode_steps *= 0 env.reset() if not terminate: steps_per_episode.append(episode_steps) print("Number of steps per episode:", steps_per_episode) print("Number of episodes that reached the end:", terminations) average_length = np.average(episode_steps) print("The average number of steps per episode was:", average_length) print("Cumulative reward:", cumulative_reward) print("\n\n")
def pumping_action_test(steps=100, verbose=False): print("==== Results with Pumping Action Policy ====") config = Config() config.current_step = 0 env = MountainCar(config) steps_per_episode = [] return_per_episode = [] episode_steps = 0 episode_return = 0 terminations = 0 for i in range(steps): current_state = env.get_current_state() A = 1 + np.sign(current_state[1]) old_state = env.get_current_state() next_S, R, terminate = env.step(A) if verbose: print("Old state:", np.round(old_state, 3), "-->", "Action:", A, "-->", "New state:", np.round(next_S, 3)) episode_steps += 1 episode_return += R if terminate: terminations += 1 if verbose: print("\n## Reset ##\n") env.reset() steps_per_episode.append(episode_steps) return_per_episode.append(episode_return) episode_steps *= 0 episode_return *= 0 print("Number of steps per episode:", steps_per_episode) print("Number of successful episodes:", terminations) print("Return per episode:", return_per_episode) print("The average return per episode is:", np.mean(return_per_episode))