Exemplo n.º 1
0
def test_available_actions():
    state = np.array([[[1, 2, 0], [1, 2, 0], [1, 2, 0]],
                      [[1, 2, 0], [1, 2, 0], [1, 2, 0]],
                      [[1, 2, 0], [1, 2, 0], [1, 2, 0]]])

    game = Game(state=state)
    actions = game.available_actions()

    # All actions except left is available
    assert actions == [1, 2, 3, 4, 5]
Exemplo n.º 2
0
def test_available_actions_none_available():
    state = np.array([[[1, 2, 3], [5, 6, 7], [1, 2, 3]],
                      [[8, 9, 10], [11, 12, 13], [8, 9, 10]],
                      [[14, 15, 16], [17, 18, 19], [14, 15, 16]]])

    game = Game(state=state)
    actions = game.available_actions()

    # All actions except left is available
    assert actions == []
    assert game.game_over()
Exemplo n.º 3
0
def test_available_actions():
  state = np.array([[1, 2, 3, 0],
                    [1, 2, 3, 0],
                    [1, 2, 3, 0],
                    [1, 2, 3, 0]])

  game = Game(state=state)
  actions = game.available_actions()

  # All actions except left is available
  assert actions == [1, 2, 3]
Exemplo n.º 4
0
def test_available_actions_none_available():
  state = np.array([[1, 2, 3, 4],
                    [5, 6, 7, 8],
                    [1, 2, 3, 4],
                    [5, 6, 7, 8]])

  game = Game(state=state)
  actions = game.available_actions()

  # All actions except left is available
  assert actions == []
  assert game.game_over()
Exemplo n.º 5
0
def test_available_actions_none_available():
  state = np.array([[1, 2, 3, 4],
                    [5, 6, 7, 8],
                    [1, 2, 3, 4],
                    [5, 6, 7, 8]])

  game = Game(state=state)
  actions = game.available_actions()

  # All actions except left is available
  assert actions == []
  assert game.game_over()
Exemplo n.º 6
0
def test_init(choice):
  choice.side_effect = [0,  # First position
                        1,  # First tile
                        1,  # Second position
                        2]  # Second tile
  game = Game()

  choice.assert_has_calls([call(16),
                           call([1, 2], p=[0.9, 0.1]),
                           call(15),
                           call([1, 2], p=[0.9, 0.1])])

  # Assert correct number of 0s, 1s and 2s
  game.print_state()
  assert (np.bincount(game.state().flatten()) == [14, 1, 1]).all()
  assert game.score() == 0
Exemplo n.º 7
0
def highest_reward_strategy(state, actions):
  """Strategy that always chooses the action of highest immediate reward.

  If there are any ties, the strategy prefers left over up over right over down.
  """

  sorted_actions = np.sort(actions)[::-1]
  rewards = map(lambda action: Game(np.copy(state)).do_action(action),
                sorted_actions)
  action_index = np.argsort(rewards, kind="mergesort")[-1]
  return sorted_actions[action_index]
Exemplo n.º 8
0
def highest_reward_strategy(state, actions):
    """Strategy that always chooses the action of highest immediate reward.

  If there are any ties, the strategy prefers left over up over right over down.
  """

    sorted_actions = np.sort(actions)[::-1]
    rewards = [
        Game(np.copy(state)).do_action(action) for action in sorted_actions
    ]
    action_index = np.argsort(rewards)[-1]
    return sorted_actions[action_index]
Exemplo n.º 9
0
def test_do_action(choice):
  choice.side_effect = [0,  # First position
                        1]  # First tile
  state = np.array([[1, 2, 3, 3],
                    [5, 6, 7, 8],
                    [5, 2, 7, 0],
                    [1, 0, 3, 0]])

  game = Game(state=state)
  game.do_action(3)  # DOWN

  new_state = np.array([[1, 0, 0, 0],
                        [1, 2, 3, 0],
                        [6, 6, 8, 3],
                        [1, 2, 3, 8]])
  game.print_state()
  assert (game.state() == new_state).all()
  # Score is 2 ** 6 + 2 ** 8
  assert game.score() == 320
Exemplo n.º 10
0
def test_do_action(choice):
    choice.side_effect = [
        0,  # First position
        1
    ]  # First tile
    state = np.array([[[1, 2, 3], [5, 6, 7], [5, 2, 7]],
                      [[1, 2, 3], [5, 6, 7], [5, 2, 7]],
                      [[1, 2, 3], [5, 6, 7], [5, 2, 7]]])

    game = Game(state=state)
    game.do_action(3)  # DOWN

    new_state = np.array([[[0, 2, 0], [1, 6, 3], [6, 2, 8]],
                          [[0, 2, 0], [1, 6, 3], [6, 2, 8]],
                          [[0, 2, 0], [1, 6, 3], [6, 2, 8]]])
    game.print_state()
    assert (game.state() == new_state).all()
    # Score is (2 ** 6 + 2 ** 8)*3
    assert game.score() == 960
Exemplo n.º 11
0
def test_do_action(choice):
  choice.side_effect = [0,  # First position
                        1]  # First tile
  state = np.array([[1, 2, 3, 3],
                    [5, 6, 7, 8],
                    [5, 2, 7, 0],
                    [1, 0, 3, 0]])

  game = Game(state=state)
  game.do_action(3)  # DOWN

  new_state = np.array([[1, 0, 0, 0],
                        [1, 2, 3, 0],
                        [6, 6, 8, 3],
                        [1, 2, 3, 8]])
  game.print_state()
  assert (game.state() == new_state).all()
  # Score is 2 ** 6 + 2 ** 8
  assert game.score() == 320
Exemplo n.º 12
0
def test_init(choice):
  choice.side_effect = [0,  # First position
                        1,  # First tile
                        1,  # Second position
                        2]  # Second tile
  game = Game()

  choice.assert_has_calls([call(16),
                           call([1, 2], p=[0.9, 0.1]),
                           call(15),
                           call([1, 2], p=[0.9, 0.1])])

  # Assert correct number of 0s, 1s and 2s
  game.print_state()
  assert (np.bincount(game.state().flatten()) == [14, 1, 1]).all()
  assert game.score() == 0
Exemplo n.º 13
0
def play(strategy, verbose=False, allow_unavailable_action=True):
  """Plays a single game, using a provided strategy.

  Args:
    strategy: A function that takes as argument a state and a list of available
        actions and returns an action from the list.
    allow_unavailable_action: Boolean, whether strategy is passed all actions
        or just the available ones.
    verbose: If true, prints game states, actions and scores.

  Returns:
    score, experiences where score is the final score and experiences is the
        list Experience instances that represent the collected experience.
  """

  game = Game()

  state = game.state().copy()
  game_over = game.game_over()
  experiences = []

  while not game_over:
    if verbose:
      print("Score:", game.score())
      game.print_state()

    old_state = state
    next_action = strategy(
        old_state, range(4) if allow_unavailable_action
                            else game.available_actions())

    if game.is_action_available(next_action):

      reward = game.do_action(next_action)
      state = game.state().copy()
      game_over = game.game_over()

      if verbose:
        print("Action:", ACTION_NAMES[next_action])
        print("Reward:", reward)

      experiences.append(Experience(old_state, next_action, reward, state,
                                    game_over, False, game.available_actions()))

    else:
      experiences.append(Experience(state, next_action, 0, state, False, True,
                                    game.available_actions()))

  if verbose:
    print("Score:", game.score())
    game.print_state()
    print("Game over.")

  return game.score(), experiences
Exemplo n.º 14
0
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)


if __name__ == "__main__":
    count = 0
    game = Game()
    env = gym.make('CartPole-v1')
    #state_size = 192
    state_size = env.observation_space.shape[0]  # in our case 4*4*12
    #action_size = 4
    action_size = env.action_space.n  #in our case 4
    agent = DQNAgent(state_size, action_size)
    # agent.load("./save/cartpole-dqn.h5")
    done = False
    batch_size = 32

    for e in range(EPISODES):
        state = game.state()
        count += 1
        #state = env.reset()
        #state = np.reshape(state, [1, state_size])