def update_game_state(self, action, player): self.history += action self.display_text += PLAYER_NAMES[player] + ACTION_MESSAGES[action] if util.is_terminal(self.history): winner = evaluator.get_winner(self.history) winnings = -evaluator.calculate_reward_full_info(self.history) self.total_winnings += winnings self.display_text += "Game over. " + PLAYER_NAMES[ winner] + "won: " + str(abs(winnings)) elif util.player(self.history) == 1: self.update_game_state(self.agent.get_action(self.history), 1) elif util.player(self.history) == 0: self.pub_card = random.choice( util.get_available_cards(self.history)) self.update_game_state(self.pub_card, 0)
def simulate(self, history): if util.is_terminal(history): return self.handle_terminal_state(history) player = util.player(history) if self.out_of_tree[player]: return self.rollout(history) player_history = util.information_function(history, player) player_tree = get_tree(player) if player_history in player_tree and player_tree[player_history].children: action = self.select(history) else: expand(player_one_tree, history, 1) expand(player_two_tree, history, -1) action = random.choice(util.get_available_actions(history)) if player != 0: self.out_of_tree[1] = True self.out_of_tree[-1] = True new_history = history + action running_reward = evaluator.calculate_reward_full_info(history) + self.discount_factor * self.simulate(new_history) update_player_tree(history, action, 1, running_reward) update_player_tree(history, action, -1, running_reward) return running_reward
def apply_mcts_strategy_from_tree(tree, full_tree, best_response_tree, current_history, terminals): if current_history not in full_tree: return best_response_tree best_response_tree[current_history] = potree.PoNode() children = full_tree[current_history].children if util.is_terminal(current_history): terminals.append(current_history) if util.player(current_history) == 1: player_history = util.information_function(current_history, 1) best_child = util.get_best_child(tree, player_history) if best_child is not None: action = best_child.replace(player_history, "") children = [current_history + action] best_response_tree[current_history].children = { current_history + action } else: children = [] else: best_response_tree[current_history].children = set(children) for history in children: apply_mcts_strategy_from_tree(tree, full_tree, best_response_tree, history, terminals) return best_response_tree
def apply_mcts_strategy_from_deterministic_strategy(strategy, full_tree, best_response_tree, current_history, terminals): if current_history not in full_tree: return best_response_tree best_response_tree[current_history] = potree.PoNode() children = full_tree[current_history].children if util.is_terminal(current_history): terminals.append(current_history) if util.player(current_history) == 1: player_history = util.information_function(current_history, 1) if player_history in strategy: child = current_history + strategy[player_history] best_response_tree[current_history].children = [ current_history + strategy[player_history] ] children = [child] else: children = [] else: best_response_tree[current_history].children = set(children) for history in children: apply_mcts_strategy_from_deterministic_strategy( strategy, full_tree, best_response_tree, history, terminals) return best_response_tree
def get_deterministic_strategy(tree): strategy = {} for key in tree.keys(): if util.player(key) == 1: if not util.is_terminal(key) and tree[key].children: best_child = util.get_best_child(tree, key, 1) strategy[key] = best_child[-1] return strategy
def select(self, history): player = util.player(history) player_history = util.information_function(history, player) if player in {-1, 1}: tree = get_tree(player) eta_sub_expression = math.pow(1 + (.1 * math.sqrt(tree[player_history].visitation_count)), -1) eta = max((GAMMA, .9 * eta_sub_expression)) z = random.uniform(0, 1) if z < eta: return self.get_best_action_ucb(history, player, tree) else: return self.get_best_action_avg_strategy(player_history, tree) else: return random.choice(util.get_available_actions(history))
def propagate_rewards_recursive(best_response_tree, history): if history == "": return parent = best_response_tree[history].parent player = util.player(parent) if player == 0: value_to_propagate = util.get_average_child_value( best_response_tree, parent) elif player == 1: value_to_propagate = best_response_tree[next( iter(best_response_tree[parent].children))].value else: best_sibling = util.get_best_child(best_response_tree, parent, -1) value_to_propagate = best_response_tree[best_sibling].value best_response_tree[parent].value = value_to_propagate propagate_rewards_recursive(best_response_tree, parent)
def get_stochastic_strategy(tree): strategy = {} for key in tree.keys(): if util.player(key) == 1: if not util.is_terminal(key) and tree[key].children: strategy[key] = {} total_child_visits = 0 for child in tree[key].children: total_child_visits += tree[child].visitation_count for child in tree[key].children: child_prob = tree[child].visitation_count / total_child_visits strategy[key][child[-1]] = child_prob return strategy