def train(game): agent = DQN(game) for i in tqdm(range(TRAIN_GAMES)): game.new_episode() previous_variables = None previous_img = None done = False local_history = [] total_reward = 0 while not done: state = game.get_state() img = state.screen_buffer variables = state.game_variables if previous_variables is None: previous_variables = variables if previous_img is None: previous_img = img action = agent.act(img) reward = game.make_action(action) done = game.is_episode_finished() reward = (reward + calculate_additional_reward(previous_variables, variables)) / 100 total_reward += reward local_history.append([previous_img, img, reward, action, done]) previous_variables = variables previous_img = img if total_reward >= 0: for previous_state, state, reward, action, done in local_history: agent.remember(previous_state, state, reward, action, done) agent.train()
def run(ep,train=False): pygame.init() loss=[] agent = DQN(3, 5) env=pongGame() weights_filepath = 'PongGame.h5' if train==False: agent.model.load_weights(weights_filepath) print("weights loaded") for e in range(ep): for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() quit() state = env.reset() state = np.reshape(state, (1, 5)) score = 0 max_steps = 1000 for i in range(max_steps): action = agent.act(state) reward, next_state, done = env.step(action) score += reward next_state = np.reshape(next_state, (1, 5)) agent.remember(state, action, reward, next_state, done) state = next_state if train==True: agent.replay() if done: print("episode: {}/{}, score: {}".format(e, ep, score)) break loss.append(score) if train: agent.model.save_weights("PongGame.h5") return loss
def play(game): agent = DQN(game, use_saved=True) for i in tqdm(range(PLAY_GAMES)): game.new_episode() done = False while not done: state = game.get_state() img = state.screen_buffer action = agent.act(img) print(action) game.make_action(action) done = game.is_episode_finished()
N_SAVE = 500 env = gym.make('FlappyBird-v0') agent = DQN(env) scores = deque(maxlen=100) for i in range(N_EP): score = 0 ob = env.reset() # Stack observations pre_ob = preprocess(ob) pre_ob = pre_ob.reshape(1, 100, 100) ob_stack = np.stack((pre_ob, ) * 4, -1) pre_ob = ob_stack while True: action = agent.act(pre_ob, step=i) ob, reward, done, _ = env.step(action) if reward <= -1: reward = -1 next_pre_ob = preprocess(ob) # Stack observations next_pre_ob = next_pre_ob.reshape(1, 100, 100) ob_stack = np.insert(ob_stack, -1, next_pre_ob, axis=3) ob_stack = np.delete(ob_stack, 0, axis=3) next_pre_ob = ob_stack agent.remember(pre_ob, action, reward, next_pre_ob, done) agent.replay()