示例#1
0
    def play_move(self, game: Game, moves: List[Move]):
        available_actions = [move.move_id for move in moves]
        start_state = game.get_nn_features(self)
        action = np.random.choice(game.total_num_actions,
                                  p=self.policy(
                                      start_state,
                                      available_actions=available_actions))
        move = self.action_to_move(action, moves)
        # Perform the action -> Get the reward and observe the next state
        new_state, reward = self.env.step(move)
        action = np.random.choice(game.total_num_actions,
                                  p=self.policy(
                                      start_state,
                                      available_actions=available_actions))
        # new_action = np.random.choice(
        #     game.total_num_actions,
        #     p=self.policy(new_state, available_actions=available_actions)
        # )
        q_values_new_state = self.estimator.predict(
            state_features=new_state, available_actions=available_actions)

        # value that we should have got
        # The Q-learning target policy is a greedy one, hence the `max`
        td_target = reward + self.discount_factor * np.nanmax(
            q_values_new_state)
        self.estimator.update(action, start_state, td_target)
示例#2
0
 def play_move(self, game: Game, moves: List[Move]):
     available_actions = [move.move_id for move in moves]
     self.state = game.get_nn_features(self)
     if len(available_actions) == 1:
         self.action = available_actions[0]
     else:
         self.action = self._get_policy_action(game, available_actions)
     move = self._action_to_move(self.action, moves)
     # Perform the action -> Get the reward and observe the next state
     new_state, reward = self.env.step(move)
     if self.train:
         td_target = self.get_td_target(new_state, reward, available_actions)
         self.estimator.update(self.state, td_target)
示例#3
0
 def _set_estimator(self, game: Game):
     if not self.estimator:
         self.estimator = LinearEstimator(game.total_num_actions,
                                          game.get_linear_features(self))
 def _set_estimator(self, game: Game):
     if not self.estimator:
         self.estimator = TensorflowNNEstimator(game.total_num_actions,
                                                game.get_nn_features(self))
 def _set_estimator(self, game: Game):
     if not self.estimator:
         self.estimator = TorchEstimator(game.total_num_actions,
                                         game.get_nn_features(self),
                                         load_model=self.load_model,
                                         name=self.name)
示例#6
0
 def _set_estimator(self, game: Game):
     if not self.estimator:
         self.estimator = NeuralNetworkEstimator(game.total_num_actions,
                                                 game.get_nn_features(self))