scores = [] agent = ReinforceAgent(model, actions=ACTIONS, noise=EXPLORE_RATE) for _ in range(TEST_EPISODES_PER_EPOCH): env.reset() agent.reset() replay = [] done = False while not done: action = agent.process(env.state) _, reward, done, prevState = env.apply(action) replay.append((prevState, action, reward)) ## states, actions, rewards = zip( *replay ) actions = ACTIONS.toIndex(actions) trainable.fit( [ np.array(states), np.array(actions), np.array(discountedReturns(rewards, GAMMA)) ], epochs=1, verbose=0 ) ## scores.append(env.score) Utils.trackScores(scores, metrics) ################## EXPLORE_RATE = max((0.001, EXPLORE_RATE * EXPLORE_RATE_DECAY)) plotData2file(metrics, 'chart.jpg')
print('[N = %d, M = %d] Start of %d epoch. Explore rate: %.3f' % (N, M, epoch, exploreRate)) # train model model.updateTargetModel() losses = [] for _ in range(TRAIN_EPISODES): states, actions, rewards, nextStates, nextStateScoreMultiplier = memory.sampleBatch( batch_size=BATCH_SIZE, maxSamplesFromEpisode=16) actions = ACTIONS.toIndex(actions) _, loss = model.fit(states, actions, rewards, nextStates, nextStateScoreMultiplier * GAMMA) losses.append(loss) ###### print('Avg. train loss: %.4f' % (np.mean(losses))) ################## print('Testing...') scores = Utils.testAgent( DQNAgent(model, actions=ACTIONS, exploreRate=exploreRate), memory, TEST_EPISODES) Utils.trackScores(scores, metrics) ################## if ((epoch % 10) == 0) and not COLAB_ENV: # debug Utils.showAgentPlay(DQNAgent(model, actions=ACTIONS, exploreRate=0)) ################## exploreRate = max((0.001, exploreRate * EXPLORE_RATE_DECAY)) plotData2file(metrics, 'chart-%d-%d.jpg' % ( N, M, ))