def play(strategy, verbose=False, allow_unavailable_action=True): """Plays a single game, using a provided strategy. Args: strategy: A function that takes as argument a state and a list of available actions and returns an action from the list. allow_unavailable_action: Boolean, whether strategy is passed all actions or just the available ones. verbose: If true, prints game states, actions and scores. Returns: score, experiences where score is the final score and experiences is the list Experience instances that represent the collected experience. """ game = Game() state = game.state().copy() game_over = game.game_over() experiences = [] while not game_over: if verbose: print("Score:", game.score()) game.print_state() old_state = state next_action = strategy( old_state, range(4) if allow_unavailable_action else game.available_actions()) if game.is_action_available(next_action): reward = game.do_action(next_action) state = game.state().copy() game_over = game.game_over() if verbose: print("Action:", ACTION_NAMES[next_action]) print("Reward:", reward) experiences.append( Experience(old_state, next_action, reward, state, game_over, False, game.available_actions())) else: experiences.append( Experience(state, next_action, 0, state, False, True, game.available_actions())) if verbose: print("Score:", game.score()) game.print_state() print("Game over.") return game.score(), experiences
def test_do_action(choice): choice.side_effect = [0, # First position 1] # First tile state = np.array([[1, 2, 3, 3], [5, 6, 7, 8], [5, 2, 7, 0], [1, 0, 3, 0]]) game = Game(state=state) game.do_action(3) # DOWN new_state = np.array([[1, 0, 0, 0], [1, 2, 3, 0], [6, 6, 8, 3], [1, 2, 3, 8]]) game.print_state() assert (game.state() == new_state).all() # Score is 2 ** 6 + 2 ** 8 assert game.score() == 320
def test_do_action(choice): choice.side_effect = [ 0, # First position 1 ] # First tile state = np.array([[[1, 2, 3], [5, 6, 7], [5, 2, 7]], [[1, 2, 3], [5, 6, 7], [5, 2, 7]], [[1, 2, 3], [5, 6, 7], [5, 2, 7]]]) game = Game(state=state) game.do_action(3) # DOWN new_state = np.array([[[0, 2, 0], [1, 6, 3], [6, 2, 8]], [[0, 2, 0], [1, 6, 3], [6, 2, 8]], [[0, 2, 0], [1, 6, 3], [6, 2, 8]]]) game.print_state() assert (game.state() == new_state).all() # Score is (2 ** 6 + 2 ** 8)*3 assert game.score() == 960
def play(strategy, verbose=False, allow_unavailable_action=True): """Plays a single game, using a provided strategy. Args: strategy: A function that takes as argument a state and a list of available actions and returns an action from the list. allow_unavailable_action: Boolean, whether strategy is passed all actions or just the available ones. verbose: If true, prints game states, actions and scores. Returns: score, experiences where score is the final score and experiences is the list Experience instances that represent the collected experience. """ game = Game() state = game.state().copy() game_over = game.game_over() experiences = [] while not game_over: if verbose: print("Score:", game.score()) game.print_state() old_state = state next_action = strategy( old_state, range(4) if allow_unavailable_action else game.available_actions()) if game.is_action_available(next_action): reward = game.do_action(next_action) state = game.state().copy() game_over = game.game_over() if verbose: print("Action:", ACTION_NAMES[next_action]) print("Reward:", reward) experiences.append(Experience(old_state, next_action, reward, state, game_over, False, game.available_actions())) else: experiences.append(Experience(state, next_action, 0, state, False, True, game.available_actions())) if verbose: print("Score:", game.score()) game.print_state() print("Game over.") return game.score(), experiences
action_size = env.action_space.n #in our case 4 agent = DQNAgent(state_size, action_size) # agent.load("./save/cartpole-dqn.h5") done = False batch_size = 32 for e in range(EPISODES): state = game.state() count += 1 #state = env.reset() #state = np.reshape(state, [1, state_size]) while not game.game_over(): # env.render() action = randint(0, 3) #replace with epsilon greedy strategy #action = agent.act(state) reward = game.do_action(action) next_state = game.state() #next_state, reward, done, _ = env.step(action) #reward = reward if not done else -10 #next_state = np.reshape(next_state, [1, state_size]) agent.remember(state, action, reward, next_state, done) state = next_state if done: print("episode: {}/{}, score: {}, e: {:.2}".format( e, EPISODES, agent.epsilon)) break print("bla") print("runs: ") if len(agent.memory) > batch_size: agent.replay(batch_size) # if e % 10 == 0: