Exemplo n.º 1
0
    def simulate(self, history):
        if util.is_terminal(history):
            return self.handle_terminal_state(history)

        player = util.player(history)
        if self.out_of_tree[player]:
            return self.rollout(history)

        player_history = util.information_function(history, player)
        player_tree = get_tree(player)
        if player_history in player_tree and player_tree[player_history].children:
            action = self.select(history)
        else:
            expand(player_one_tree, history, 1)
            expand(player_two_tree, history, -1)
            action = random.choice(util.get_available_actions(history))
            if player != 0:
                self.out_of_tree[1] = True
                self.out_of_tree[-1] = True

        new_history = history + action
        running_reward = evaluator.calculate_reward_full_info(history) + self.discount_factor * self.simulate(new_history)
        update_player_tree(history, action, 1, running_reward)
        update_player_tree(history, action, -1, running_reward)

        return running_reward
Exemplo n.º 2
0
def apply_mcts_strategy_from_tree(tree, full_tree, best_response_tree,
                                  current_history, terminals):
    if current_history not in full_tree:
        return best_response_tree

    best_response_tree[current_history] = potree.PoNode()
    children = full_tree[current_history].children
    if util.is_terminal(current_history):
        terminals.append(current_history)

    if util.player(current_history) == 1:
        player_history = util.information_function(current_history, 1)
        best_child = util.get_best_child(tree, player_history)
        if best_child is not None:
            action = best_child.replace(player_history, "")
            children = [current_history + action]
            best_response_tree[current_history].children = {
                current_history + action
            }
        else:
            children = []
    else:
        best_response_tree[current_history].children = set(children)

    for history in children:
        apply_mcts_strategy_from_tree(tree, full_tree, best_response_tree,
                                      history, terminals)

    return best_response_tree
Exemplo n.º 3
0
def apply_mcts_strategy_from_deterministic_strategy(strategy, full_tree,
                                                    best_response_tree,
                                                    current_history,
                                                    terminals):
    if current_history not in full_tree:
        return best_response_tree

    best_response_tree[current_history] = potree.PoNode()
    children = full_tree[current_history].children
    if util.is_terminal(current_history):
        terminals.append(current_history)

    if util.player(current_history) == 1:
        player_history = util.information_function(current_history, 1)
        if player_history in strategy:
            child = current_history + strategy[player_history]
            best_response_tree[current_history].children = [
                current_history + strategy[player_history]
            ]
            children = [child]
        else:
            children = []
    else:
        best_response_tree[current_history].children = set(children)

    for history in children:
        apply_mcts_strategy_from_deterministic_strategy(
            strategy, full_tree, best_response_tree, history, terminals)

    return best_response_tree
Exemplo n.º 4
0
 def get_best_action_ucb(self, history, player, tree):
     player_history = util.information_function(history, player)
     best_value = float('-inf')
     best_action = None
     for action in util.get_available_actions(history):
         node_val = self.calculate_next_node_value(tree, player_history, action, player)
         if node_val > best_value:
             best_action = action
             best_value = node_val
     return best_action
Exemplo n.º 5
0
def expand(tree, history, player):
    player_history = util.information_function(history, player)
    if player_history not in tree:
        tree[player_history] = potree.PoNode()

    for action in util.get_available_actions(player_history, player=player):
        new_history = player_history + action
        if new_history not in tree:
            tree[new_history] = potree.PoNode()
        tree[player_history].children.add(new_history)
Exemplo n.º 6
0
 def select(self, history):
     player = util.player(history)
     player_history = util.information_function(history, player)
     if player in {-1, 1}:
         tree = get_tree(player)
         eta_sub_expression = math.pow(1 + (.1 * math.sqrt(tree[player_history].visitation_count)), -1)
         eta = max((GAMMA, .9 * eta_sub_expression))
         z = random.uniform(0, 1)
         if z < eta:
             return self.get_best_action_ucb(history, player, tree)
         else:
             return self.get_best_action_avg_strategy(player_history, tree)
     else:
         return random.choice(util.get_available_actions(history))
Exemplo n.º 7
0
    def get_action(self, history):
        player_history = util.information_function(history, 1)
        if isinstance(self.strategy[player_history], dict):
            candidates = []
            probabilities = []

            for key, value in self.strategy[player_history].items():
                candidates.append(key)
                probabilities.append(value)

            action_choice = choice(candidates, p=probabilities)
            return action_choice
        else:
            return self.strategy[player_history]
Exemplo n.º 8
0
def update_player_tree(history, action, player, reward):
    player_history = util.information_function(history, player)
    new_player_history = util.information_function(str(history + action), player)
    update(get_tree(player), player_history, new_player_history, reward)