class AI: def __init__(self, fname): lr = 0.0005 self.agent = Agent(gamma=0.99, epsilon=0.0, alpha=lr, input_dims=6, n_actions=2, mem_size=60000, batch_size=64, epsilon_end=0.0, fname=fname) self.observation = [] self.action = 0 self.n_step = 0 self.fname = fname.split("/")[-1] def episode_start(self, observation): self.observation = observation def choose_action(self): self.action = self.agent.choose_action(self.observation) return self.action def step(self, observation_, reward, done): self.agent.remember(self.observation, self.action, reward, observation_, int(done)) self.observation = observation_ if self.n_step % 3 == 0: self.agent.learn() self.n_step += 1 def episode_end(self): self.agent.save_model()
def start(): env = gym.make('CartPole-v0') params = { 'gamma': 0.8, 'epsi_high': 0.9, 'epsi_low': 0.05, 'decay': 500, 'lr': 0.001, 'capacity': 10000, 'batch_size': 64, 'state_space_dim': env.observation_space.shape[0], 'action_space_dim': env.action_space.n } agent = Agent(**params) score = [] mean = [] for episode in range(1000): s0 = env.reset() total_reward = 1 for i in range(200): env.render() a0 = agent.act(s0) s1, r1, done, _ = env.step(a0) if done: r1 = -1 agent.put(s0, a0, r1, s1) if done: break total_reward += r1 s0 = s1 agent.learn() score.append(total_reward) mean.append(sum(score[-100:]) / 100) print(total_reward)
def main(): #make env and agent env = gym.make('LunarLander-v2') agent = Agent(gamma=0.99, epsilon=1.0, batch_size=64, n_actions=4, eps_end=0.01, input_dims=[8], lr=0.0001) scores, eps_history = [], [] n_games = 500 for i in range(n_games): score = 0 done = False observation = env.reset() while not done: #ingame #get action from current view of game (observation) action = agent.choose_action(observation) #next frame observation_, reward, done, info = env.step(action) score += reward #store memory agent.store_transisation(observation, action, reward, observation_, done) agent.learn() #set next stage to current stage observation = observation_ #append score and eps scores.append(score) eps_history.append(agent.epsilon) #print some nice statements avg_score = np.mean(scores[-100:]) print( f'Episode: {i} Score: {score} Average Score: {avg_score} Epsilon: {agent.epsilon}' )
def OldStuff(): tf.compat.v1.disable_eager_execution() lr = 0.001 numGames = 10000 session = TriadGameSession() observation = session.getState() scores = [] agent = Agent(gamma=0.99, lr=lr, epsilon=1.0, epsilonDec=0.0005, inputSize=[len(observation)], numActions=session.getMaxActions(), memSize=1000000, batchSize=1024) for i in range(numGames): done = False score = 0 session = TriadGameSession() observation = session.getState() while not done: action = agent.chooseAction(observation) observationNext, reward, done = session.step(action) score += reward agent.store(observation, action, reward, observationNext, done) observation = observationNext agent.learn() scores.append(score) avgScore = np.mean(scores[-100:]) print('game:', i, 'score %.2f' % score, 'avgScore %.2f' % avgScore, 'epsilon %.2f' % agent.epsilon) #agent.save() print('Finished!')
if i % 10 == 0 and i > 0: avg_score = np.mean(scores[max(0, i - 10):(i + 1)]) print('episode: ', i, 'score: ', score, ' average score %.3f' % avg_score, 'epsilon %.3f' % brain.EPSILON) else: print('episode: ', i, 'score: ', score) eps_history.append(brain.EPSILON) done = False observation = env.reset() score = 0 while not done: action = brain.chooseAction(observation) observation_, reward, done, info = env.step(action) score += reward brain.storeTransition(observation, action, reward, observation_, done) observation = observation_ brain.learn() scores.append(score) for i in range(10): done = False observation = env.reset() while not done: action = brain.chooseAction(observation) observation_, reward, done, info = env.step(action) observation = observation_ env.render()
n_actions=4, batch_size=64) scores = [] eps_history = [] for i in range(1, n_games+1): done = False score = 0 obseervation = env.reset() while not done: if show: env.render() action = agent.choose_action(obseervation) obseervation_, reward, done, info = env.step(action) score += reward agent.remember(obseervation, action, reward, obseervation_, done) obseervation = obseervation_ agent.learn() eps_history.append(agent.epsilon) scores.append(score) avg_score = np.mean(scores[max(0, i-100):i+1]) print('epsiode', i, 'score ', score, 'avg score', avg_score) if i % 10 == 0 and i > 0: agent.save_model() plt.plot(scores) plt.plot(eps_history) plt.legend(['score', 'epsilon'], loc='upper left') plt.show()
def main(): scores = [] eps_history = [] info_history = [] # Random starting-points: env = sky.make(random=True, xi=(301, 650 - 25), yi=(100, 300 - 25), width=15, height=15, v_initial=14) # Fixed starting-point: #env = sky.make(xi=550) agent = Agent(gamma=gamma, epsilon=epsilon, lr=lr, input_dims=[imput_dimensions], n_actions=n_actions, mem_size=mem_size, batch_size=batch_size, epsilon_dec=epsilon_dec) if (load_checkpoint): agent.load_modes() for i in range(n_games): score = 0 done = False observation = env.reset() while not done: ''' one game: ending, when done=True ''' action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) score += reward agent.store_transition(observation, action, reward, observation_, int(done)) observation = observation_ agent.learn() if i % 10 == 0 and i > 0: avg_score = np.mean(scores[max(0, i - 10):(i + 1)]) print(i, 'episode', info, '|| score:', score, '| average score: %.3f' % avg_score, '| epsilon: %.3f' % agent.epsilon, '| training done:', round(i / n_games, 2)) else: print(i, 'episode', info, '|| score:', score) scores.append(score) eps_history.append(agent.epsilon) info_history.append(info) print('training ended with:', [[el, info_history.count(el)] for el in ('crashed', 'goal')]) if (save_checkpoint): agent.save_models() print('[+] model saved') # ------------------- # Plotting and output # ------------------- x = [i + 1 for i in range(n_games)] # First axis: Scores fig, ax1 = plt.subplots() color = 'tab:red' ax1.set_xlabel('Episode') ax1.set_ylabel('score per Episode', color=color) ax1.scatter(x, scores, color=color, s=2) ax1.tick_params(axis='y', labelcolor=color) # Second axis: epsilon ax2 = ax1.twinx() # instantiate a second axes that shares the same x-axis color = 'tab:blue' ax2.set_ylabel('epsilon', color=color) # we already handled the x-label with ax1 ax2.plot(x, eps_history, color=color) ax2.tick_params(axis='y', labelcolor=color) # Output fig.tight_layout() # otherwise the right y-label is slightly clipped plt.savefig(filename) return env
# Update memory objects with states for each player if train_networks == True: memory_1.store_transition(p1_state, p1_action, p1_reward, p1_state_, int(done)) memory_2.store_transition(p2_state, p2_action, p2_reward, p2_state_, int(done)) # Train agent DeepQ Networks # Assign history values so it doesnt break when memcntr is low history_1, history_2 = None, None if train_networks == True and memory_1.mem_cntr > batch_size: history_1, history_2 = None, None if p1_type == 'Agent': history_1 = agent_1.learn( batch_size, memory_1.sample_buffer(batch_size)) if p2_type == 'Agent': history_2 = agent_2.learn( batch_size, memory_2.sample_buffer(batch_size)) # Update state for new step p1_state = p1_state_ p2_state = p2_state_ p1_tot += p1_reward p2_tot += p2_reward # Save networks if they are also being trained # Moved inside game loop, will save every 1000 frames # Games are getting longer and need to be saved in progress