def start_workers(num_agents: int = 1, env_name: str = '', state_builder: StateBuilder = None, num_epochs: int = 2001, update_frequency: int = 10): State.num_agents = num_agents for agent in range(num_agents): controller = Controller(learner=None, env_id=env_name, state_builder=state_builder, update_freq=update_frequency, id=agent) learner = QLearner(controller.get_action_space(), epsilon=0.1, init_alpha=.5, gamma=.9, decay_rate=.999) controller.set_learner(learner) agent_thread = threading.Thread(target=controller.train, kwargs={ "number_epochs": num_epochs, "save_location": '../models/{}-{}.model'.format( env_name, agent) }) agent_thread.start() return
def main(): # Taxi-v2 cart_pole_ctrl = Controller(None, 'CartPole-v1', StateBuilderCartPole(), communicate=False) # cart_pole_ctrl = Controller(None, 'Taxi-v2', None, communicate=False) # cart_pole_ctrl = Controller(None, 'LunarLander-v2', state_builder=StateBuilderLunarLander(), communicate=False) # cart_pole_ctrl = Controller(None, 'FrozenLake-v0', None, communicate=False) running_cumulative_reward = [] for _ in range(3): learner = QLearner(cart_pole_ctrl.get_action_space(), epsilon=0.1, init_alpha=.5, gamma=.9, decay_rate=.999) cart_pole_ctrl.set_learner(learner) cumulative_reward, num_steps = cart_pole_ctrl.train(number_epochs=2001) running_cumulative_reward.append(cumulative_reward) ar = np.array(running_cumulative_reward) means = np.mean(ar, axis=0) standard_errors = scipy.stats.sem(ar, axis=0) uperconf = means + standard_errors lowerconf = means - standard_errors # avg_cumulative = ar.sum(axis=0) # avg_cumulative = avg_cumulative/len(running_cumulative_reward) x = np.arange(0, len(means)) # plt.plot(x, means, 'o') z = np.polyfit(x, means, 5) p = np.poly1d(z) plt.plot(x, p(x)) plt.fill_between(x, uperconf, lowerconf, alpha=0.3, antialiased=True) # plt.ylim(ymax=50, ymin=-800) plt.show() plt.close() # z = np.arange(0, len(num_steps)) # plt.plot(z, num_steps) # plt.show() # plt.close() cart_pole_ctrl.env.close()
def use_model(): cart_pole_ctrl = Controller(None, 'CartPole-v1', StateBuilderCartPole(), communicate=False) # cart_pole_ctrl = Controller(None, 'Taxi-v2', None) # cart_pole_ctrl = Controller(None, 'LunarLander-v2', state_builder=StateBuilderLunarLander(), communicate=False) learner = QLearner(cart_pole_ctrl.get_action_space(), epsilon=0.0, init_alpha=.5, gamma=.9) cart_pole_ctrl.set_learner(learner) cart_pole_ctrl.load("models/CartPole-v1-7.model") count = 0 while True: cart_pole_ctrl.run(render=True) count += 1 print("Epoch {}".format(count))
def start_reference_aggregated_learner(env_name: str = ''): state_builder = StateBuilderCache.builders.get(env_name, None) controller = Controller(learner=None, env_id=env_name, state_builder=state_builder) learner = QLearner(controller.get_action_space(), epsilon=0.1, init_alpha=.5, gamma=.9, decay_rate=.999) # SET MODEL with copy of Server Model learner.set_model(copy.deepcopy(QServer.Q)) controller.set_learner(learner) agent_thread = threading.Thread(target=controller.run) agent_thread.start() print('Started Reference Learner') return