def save_games(LEARNING_RATES=[0.15, 0.20], EPSILONS=[0.5], END_EPSILON_DECAYING_POSITIONS=[2.0], DISCOUNTS=[0.95], DISCRETE_OS_SIZES=[20], episodes=5000, show_every=4000, stats_every=100): games_count = len(LEARNING_RATES) * len(EPSILONS) * len(END_EPSILON_DECAYING_POSITIONS) * len(DISCOUNTS) * len( DISCRETE_OS_SIZES) total_rounds_time = 0 round = 0 print("games_count: ", games_count) for learning_rate_cycle in range(len(LEARNING_RATES)): for epsilon_cycle in range(len(EPSILONS)): for end_epsilon_decaying_cycle in range(len(END_EPSILON_DECAYING_POSITIONS)): for discount_cycle in range(len(DISCOUNTS)): for discrete_os_size_cycle in range(len(DISCRETE_OS_SIZES)): round += 1 learning_rate = LEARNING_RATES[learning_rate_cycle] epsilon = EPSILONS[epsilon_cycle] end_epsilon_decaying = END_EPSILON_DECAYING_POSITIONS[end_epsilon_decaying_cycle] discount = DISCOUNTS[discount_cycle] discrete_os_size = [DISCRETE_OS_SIZES[discrete_os_size_cycle], DISCRETE_OS_SIZES[discrete_os_size_cycle]] # ''' NAME = "ep-{}__stats-{}__lr-{}__eps-{}__epsDec-{}__disc-{}__size-{}".format(episodes, stats_every, learning_rate, epsilon, end_epsilon_decaying, discount, discrete_os_size) print(NAME) start = time.time() stats_ep_rewards = mountain_car_single_game(LEARNING_RATE=learning_rate, epsilon=epsilon, end_epsilon_decaying_position=end_epsilon_decaying, DISCOUNT=discount, DISCRETE_OS_SIZE=discrete_os_size, EPISODES=episodes, SHOW_EVERY=show_every, STATS_EVERY=stats_every) round_time = time.time() - start total_rounds_time += round_time print("round: |", round, "/", games_count, "|") print("round time length: ", time.strftime('%M:%S', round_time), " ||| time left expected: ", time.strftime('%H:%M:%S', total_rounds_time / round * (games_count - round))) stats_ep_rewards_ep = stats_ep_rewards['ep'] stats_ep_rewards_avg = stats_ep_rewards['avg'] MountainCar_Q_Learning_storage_agent.save_np(name=NAME, data=np.array(stats_ep_rewards_avg)) EPISODES_NAME = "ep-{}__stats-{}__episodes".format(episodes, stats_every)
tf.convert_to_tensor(prev_state), 0) action = Agent.action( tf_prev_state, ou_noise ) # Recieve state and reward from environment. state, reward, done, info = env.step( action) Agent.record( (prev_state, action, reward, state)) episodic_reward += reward Agent.learn() if done: # End this episode when `done` is True break prev_state = state ep_reward_list.append(episodic_reward) # Mean of last 40 episodes avg_reward = np.mean( ep_reward_list[-stats_every:]) print("Episode * {} * Avg Reward is ==> {}". format(ep, avg_reward)) avg_reward_list.append(avg_reward) MountainCar_Q_Learning_storage_agent.save_np( name=NAME, data=np.array(avg_reward_list))
import OpenAi.MountainCar.Q_Learning.gym_agent as MountainCar_Q_Learning_gym_agent import OpenAi.MountainCar.Q_Learning.storage_agent as MountainCar_Q_Learning_storage_agent import OpenAi.MountainCar.Q_Learning.visualisation_agent as MountainCar_Q_Learning_visualisation_agent import matplotlib.pyplot as plt import numpy as np LEARNING_RATES = [0.15, 0.20] EPSILONS = [0.2, 0.5, 0.7] END_EPSILON_DECAYING_POSITIONS = [1.5, 2.0, 2.5] DISCOUNTS = [0.70, 0.85, 0.95] DISCRETE_OS_SIZES = [10, 20, 30] episodes = 10000 show_every = 1000 stats_every = 200 round = 0 stats_ep_rewards = [] for episode in range(episodes): if not episode % stats_every: stats_ep_rewards.append(episode) print(stats_ep_rewards[10]) EPISODES_NAME = "ep-{}__stats-{}__episodes".format(episodes, stats_every) MountainCar_Q_Learning_storage_agent.save_np(name=EPISODES_NAME, data=np.array(stats_ep_rewards))