예제 #1
0
 def update_game_state(self, action, player):
     self.history += action
     self.display_text += PLAYER_NAMES[player] + ACTION_MESSAGES[action]
     if util.is_terminal(self.history):
         winner = evaluator.get_winner(self.history)
         winnings = -evaluator.calculate_reward_full_info(self.history)
         self.total_winnings += winnings
         self.display_text += "Game over. " + PLAYER_NAMES[
             winner] + "won: " + str(abs(winnings))
     elif util.player(self.history) == 1:
         self.update_game_state(self.agent.get_action(self.history), 1)
     elif util.player(self.history) == 0:
         self.pub_card = random.choice(
             util.get_available_cards(self.history))
         self.update_game_state(self.pub_card, 0)
예제 #2
0
    def simulate(self, history):
        if util.is_terminal(history):
            return self.handle_terminal_state(history)

        player = util.player(history)
        if self.out_of_tree[player]:
            return self.rollout(history)

        player_history = util.information_function(history, player)
        player_tree = get_tree(player)
        if player_history in player_tree and player_tree[player_history].children:
            action = self.select(history)
        else:
            expand(player_one_tree, history, 1)
            expand(player_two_tree, history, -1)
            action = random.choice(util.get_available_actions(history))
            if player != 0:
                self.out_of_tree[1] = True
                self.out_of_tree[-1] = True

        new_history = history + action
        running_reward = evaluator.calculate_reward_full_info(history) + self.discount_factor * self.simulate(new_history)
        update_player_tree(history, action, 1, running_reward)
        update_player_tree(history, action, -1, running_reward)

        return running_reward
예제 #3
0
def apply_mcts_strategy_from_tree(tree, full_tree, best_response_tree,
                                  current_history, terminals):
    if current_history not in full_tree:
        return best_response_tree

    best_response_tree[current_history] = potree.PoNode()
    children = full_tree[current_history].children
    if util.is_terminal(current_history):
        terminals.append(current_history)

    if util.player(current_history) == 1:
        player_history = util.information_function(current_history, 1)
        best_child = util.get_best_child(tree, player_history)
        if best_child is not None:
            action = best_child.replace(player_history, "")
            children = [current_history + action]
            best_response_tree[current_history].children = {
                current_history + action
            }
        else:
            children = []
    else:
        best_response_tree[current_history].children = set(children)

    for history in children:
        apply_mcts_strategy_from_tree(tree, full_tree, best_response_tree,
                                      history, terminals)

    return best_response_tree
예제 #4
0
def apply_mcts_strategy_from_deterministic_strategy(strategy, full_tree,
                                                    best_response_tree,
                                                    current_history,
                                                    terminals):
    if current_history not in full_tree:
        return best_response_tree

    best_response_tree[current_history] = potree.PoNode()
    children = full_tree[current_history].children
    if util.is_terminal(current_history):
        terminals.append(current_history)

    if util.player(current_history) == 1:
        player_history = util.information_function(current_history, 1)
        if player_history in strategy:
            child = current_history + strategy[player_history]
            best_response_tree[current_history].children = [
                current_history + strategy[player_history]
            ]
            children = [child]
        else:
            children = []
    else:
        best_response_tree[current_history].children = set(children)

    for history in children:
        apply_mcts_strategy_from_deterministic_strategy(
            strategy, full_tree, best_response_tree, history, terminals)

    return best_response_tree
예제 #5
0
def get_deterministic_strategy(tree):
    strategy = {}

    for key in tree.keys():
        if util.player(key) == 1:
            if not util.is_terminal(key) and tree[key].children:
                best_child = util.get_best_child(tree, key, 1)
                strategy[key] = best_child[-1]

    return strategy
예제 #6
0
 def select(self, history):
     player = util.player(history)
     player_history = util.information_function(history, player)
     if player in {-1, 1}:
         tree = get_tree(player)
         eta_sub_expression = math.pow(1 + (.1 * math.sqrt(tree[player_history].visitation_count)), -1)
         eta = max((GAMMA, .9 * eta_sub_expression))
         z = random.uniform(0, 1)
         if z < eta:
             return self.get_best_action_ucb(history, player, tree)
         else:
             return self.get_best_action_avg_strategy(player_history, tree)
     else:
         return random.choice(util.get_available_actions(history))
예제 #7
0
def propagate_rewards_recursive(best_response_tree, history):
    if history == "":
        return
    parent = best_response_tree[history].parent
    player = util.player(parent)
    if player == 0:
        value_to_propagate = util.get_average_child_value(
            best_response_tree, parent)
    elif player == 1:
        value_to_propagate = best_response_tree[next(
            iter(best_response_tree[parent].children))].value
    else:
        best_sibling = util.get_best_child(best_response_tree, parent, -1)
        value_to_propagate = best_response_tree[best_sibling].value
    best_response_tree[parent].value = value_to_propagate
    propagate_rewards_recursive(best_response_tree, parent)
예제 #8
0
def get_stochastic_strategy(tree):
    strategy = {}

    for key in tree.keys():
        if util.player(key) == 1:
            if not util.is_terminal(key) and tree[key].children:
                strategy[key] = {}
                total_child_visits = 0
                for child in tree[key].children:
                    total_child_visits += tree[child].visitation_count

                for child in tree[key].children:
                    child_prob = tree[child].visitation_count / total_child_visits
                    strategy[key][child[-1]] = child_prob

    return strategy