示例#1
0
class AIPlayer(Player):
    def __init__(self, identifier: int, config: MLPConfig):
        super().__init__(identifier=identifier)
        self.action_service = ActionService()
        self.model = config.load_model()

    def _predict(self, game_state: 'GameState'):
        return self.model.predict({
            "state":
            np.atleast_2d(game_state.create_numeral_representation(self))
        })

    @property
    def is_human(self):
        return False

    def _choose_action(self,
                       game_state: 'GameState',
                       verbose: bool = False) -> 'Action':
        predictions = self._predict(game_state=game_state)
        mask = self.action_service.get_valid_actions_mask(
            self, game_state.board)
        proper_predictions = predictions * np.atleast_2d(mask)
        masked_predictions = np.atleast_2d(
            np.logical_not(mask) * (np.min(predictions) - 1))
        predictions = proper_predictions + masked_predictions
        action_idx = np.argmax(predictions[0])  # type: int
        return self.action_service.idx_to_action(action_idx)
示例#2
0
def play_game(env, train_net, target_net, epsilon, copy_step, print_exp_step):
    rewards = 0
    iteration = 0
    done = False
    state = env.reset()
    while not done:
        actions_mask = env.get_current_actions_mask()
        action = train_net.get_action(state, actions_mask, epsilon)
        prev_state = state
        state, reward, done, _ = env.step(action)
        rewards += reward
        if done:
            env.reset()

        exp = {
            's': prev_state,
            'a': action,
            'r': reward,
            'm': actions_mask,
            's2': state,
            'done': done
        }
        train_net.add_experience(exp)
        train_net.train(target_net)
        iteration += 1
        if iteration % print_exp_step == 0:
            print("Experience replay:")
            for exp_action in train_net.experience['a']:
                print(ActionService().idx_to_action(exp_action))
        if iteration % copy_step == 0:
            target_net.copy_weights(train_net)
    return rewards
示例#3
0
 def step(self, action_idx: int):
     """
     The agent takes a step in the environment.
     Parameters
     ----------
     action_idx : int
     Returns
     -------
     ob, reward, episode_over, info : tuple
         ob (object) :
             an environment-specific object representing your observation of
             the environment.
         reward (float) :
             amount of reward achieved by the previous action. The scale
             varies between environments, but the goal is always to increase
             your total reward.
         episode_over (bool) :
             whether it's time to reset the environment again. Most (but not
             all) tasks are divided up into well-defined episodes, and done
             being True indicates the episode has terminated. (For example,
             perhaps the pole tipped too far, or you lost your last life.)
         info (dict) :
              diagnostic information useful for debugging. It can sometimes
              be useful for learning (for example, it might contain the raw
              probabilities behind the environment's last state change).
              However, official evaluations of your agent are not allowed to
              use this for learning.
     """
     if self.game.is_finished():
         raise RuntimeError("Episode is done, please reset the game.")
     action = ActionService().idx_to_action(action_idx)  # type: Action
     self.curr_step += 1
     self._take_action(action, action_idx)
     reward = action.get_reward()
     # If the game is over and the last player who played a card is the winner, add a winning bonus to the reward
     # Note that at this point the 'current player' is not the player who took the last action,
     # since _take_action() already swapped the current player.
     if self.game.is_finished():
         player0 = self.game.players[0]
         player1 = self.game.players[1]
         if self.game.current_player == player0 and player0.score < player1.score:
             reward += 100
         if self.game.current_player == player1 and player0.score > player1.score:
             reward += 100
     observation = self._get_state()
     return observation, reward, self.game.is_finished(), {}
示例#4
0
 def _choose_action(self,
                    game_state: 'GameState',
                    verbose: bool = False) -> 'Action':
     eligible_actions = ActionService().get_valid_actions(self, game_state)
     for i, action in enumerate(eligible_actions):
         print("{}: {}".format(i, action))
     action_index = None
     while action_index is None:
         try:
             action_index = input(
                 "Type the number of the action you wish to execute:")
             action_index = int(action_index)
         except ValueError:
             print("Invalid action number '{}', please enter a number!".
                   format(action_index))
             action_index = None
     return eligible_actions[action_index]
示例#5
0
 def __init__(self):
     # Model params
     self.hidden_units = [32, 32]
     # Environment params
     self.num_states = GameState.SIZE
     self.num_actions = ActionService().num_actions
     # Training params
     self.gamma = 0.99
     self.copy_step = 128
     self.print_exp_step = 100000000
     self.max_experiences = 1000
     self.min_experiences = 64
     self.batch_size = 64
     self.lr = 1e-2
     self.number_iterations = 10000
     self.epsilon = 0.999
     self.decay = 0.99995  # This decay makes it so that after 1000 iterations epsilon is 50%
     self.min_epsilon = 0.1
     self.avg_rewards = 0
 def test_take_card(self):
     # Execute a put action for the first player
     target_player = self.game.players[0]
     original_deck_num_cards = self.game.board.deck.num_cards()
     original_player_num_cards = target_player.hand.num_cards()
     self.game.board.set_phase(GamePhase.DRAW_PHASE)
     take_card_action = TakeCardAction()
     # Ensure the player can perform the play card action
     validated_actions = ActionService().get_valid_actions(
         target_player, self.game.board)
     self.assertIn(take_card_action, validated_actions)
     # Execute the action
     take_card_action.execute(target_player, self.game.board)
     # Ensure a card has been added to the players hand
     self.assertEqual(target_player.hand.num_cards(),
                      original_player_num_cards + 1)
     # Ensure the deck has lost 1 card
     self.assertEqual(self.game.board.deck.num_cards(),
                      original_deck_num_cards - 1)
示例#7
0
 def step(self, action_idx: int):
     """
     The agent takes a step in the environment.
     Parameters
     ----------
     action_idx : int
     Returns
     -------
     ob, reward, episode_over, info : tuple
         ob (object) :
             an environment-specific object representing your observation of
             the environment.
         reward (float) :
             amount of reward achieved by the previous action. The scale
             varies between environments, but the goal is always to increase
             your total reward.
         episode_over (bool) :
             whether it's time to reset the environment again. Most (but not
             all) tasks are divided up into well-defined episodes, and done
             being True indicates the episode has terminated. (For example,
             perhaps the pole tipped too far, or you lost your last life.)
         info (dict) :
              diagnostic information useful for debugging. It can sometimes
              be useful for learning (for example, it might contain the raw
              probabilities behind the environment's last state change).
              However, official evaluations of your agent are not allowed to
              use this for learning.
     """
     if self.game.is_finished():
         raise RuntimeError("Episode is done, please reset the game.")
     action = ActionService().idx_to_action(action_idx)  # type: Action
     pre_action_score = self.game.player.score
     self.curr_step += 1
     self._take_action(action, action_idx)
     if self.game.player.broken:
         reward = self.penalty
     else:
         score = self.game.player.score
         reward = int((score**2 / (abs(score - 21) + 1)))
     observation = self._get_state()
     # print(f"{pre_action_score} => {self.game.player.score} through {action} with reward {reward}")
     return observation, reward, self.game.is_finished(), {}
 def test_play_card(self):
     # Execute a put action for the first player
     target_player = self.game.players[0]
     target_card = Card(12, HEARTS)
     self.game.board.set_phase(GamePhase.ACTION_PHASE)
     target_player.hand.clear()
     target_player.hand.add(Card(1,
                                 HEARTS))  # random card so hand isn't empty
     target_player.hand.add(target_card)
     play_card_action = PlayCardAction(target_card)
     # Ensure the player can perform the play card action
     validated_actions = ActionService().get_valid_actions(
         target_player, self.game.board)
     self.assertIn(play_card_action, validated_actions)
     # Execute the action
     play_card_action.execute(target_player, self.game.board)
     # Ensure the card has been played
     self.assertEqual(target_card, self.game.board.stack.look())
     # Ensure the card has been removed from the players hand
     self.assertNotIn(target_card, target_player.hand)
示例#9
0
 def get_current_actions_mask(self) -> List[bool]:
     """Return a boolean mask representing the current valid actions."""
     return ActionService().get_valid_actions_mask(self.player,
                                                   self.get_state())
示例#10
0
 def _choose_action(self,
                    game_state: 'GameState',
                    verbose: bool = False) -> 'Action':
     eligible_actions = ActionService().get_valid_actions(self, game_state)
     return random.choice(eligible_actions)
示例#11
0
 def __init__(self, identifier: int, config: MLPConfig):
     super().__init__(identifier=identifier)
     self.action_service = ActionService()
     self.model = config.load_model()
示例#12
0
 def __init__(self, config: MLPConfig):
     super().__init__()
     self.action_service = ActionService()
     self.model = config.load_model()