def run(ep,train=False): pygame.init() loss=[] agent = DQN(3, 5) env=pongGame() weights_filepath = 'PongGame.h5' if train==False: agent.model.load_weights(weights_filepath) print("weights loaded") for e in range(ep): for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() quit() state = env.reset() state = np.reshape(state, (1, 5)) score = 0 max_steps = 1000 for i in range(max_steps): action = agent.act(state) reward, next_state, done = env.step(action) score += reward next_state = np.reshape(next_state, (1, 5)) agent.remember(state, action, reward, next_state, done) state = next_state if train==True: agent.replay() if done: print("episode: {}/{}, score: {}".format(e, ep, score)) break loss.append(score) if train: agent.model.save_weights("PongGame.h5") return loss
action = agent.act(pre_ob, step=i) ob, reward, done, _ = env.step(action) if reward <= -1: reward = -1 next_pre_ob = preprocess(ob) # Stack observations next_pre_ob = next_pre_ob.reshape(1, 100, 100) ob_stack = np.insert(ob_stack, -1, next_pre_ob, axis=3) ob_stack = np.delete(ob_stack, 0, axis=3) next_pre_ob = ob_stack agent.remember(pre_ob, action, reward, next_pre_ob, done) agent.replay() pre_ob = next_pre_ob score = score + reward if done: break scores.append(score) print("Episode {} score: {}".format(i + 1, score)) mean_score = np.mean(scores) if (i + 1) % 5 == 0: print( "Episode {}, score: {}, exploration at {}%, mean of last 100 episodes was {}" .format(i + 1, score, agent.epsilon * 100, mean_score))