示例#1
0
 def evaluate_state(self,
                    state: State,
                    list_of_actions: List[Action] = None) -> float:
     inversed_state = StateAsDict(state).to_state()
     inversed_state.change_active_player()
     return self.value_function.evaluate(
         state) - self.value_function.evaluate(inversed_state)
    def choose_act(self, mode, info=False):

        current_state_as_dict = StateAsDict(self.env.current_state_of_the_game)
        list_of_actions = self.env.action_space.list_of_actions
        if list_of_actions:
            best_action = None
            best_action_value = -100
            for action in list_of_actions:
                state_copy = current_state_as_dict.to_state()
                action.execute(state_copy)
                current_value = self.model.get_value(state_copy)
                if current_value > best_action_value:
                    best_action_value = current_value
                    best_action = action

            if not info:
                return best_action
            if info:
                return best_action, best_action_value

        else:
            if not info:
                return None
            if info:
                return None, -1
 def generate_all_tree_data_main_track(self,
                                       confidence_threshold: float = 0.1,
                                       count_threshold: int = 6,
                                       confidence_limit: int = 2):
     self.clean_memory()
     print('Collecting tree data.')
     X = []
     Y = []
     vertex = self.last_vertex
     if len(vertex.children) > 0:
         child_idx = random.randint(0, len(vertex.children) - 1)
         child_state = StateAsDict(
             vertex.children[child_idx].return_state()).to_state()
         child_state_inversed = StateAsDict(
             vertex.children[child_idx].return_state()).to_state()
         child_state_inversed.change_active_player()
         X.append(child_state)
         Y.append(vertex.children[child_idx].value_acc.get())
         X.append(child_state_inversed)
         Y.append(-vertex.children[child_idx].value_acc.get())
     while vertex.parent is not None:
         # take first:
         vertex_state = StateAsDict(vertex.return_state()).to_state()
         vertex_state_inversed = StateAsDict(
             vertex.return_state()).to_state()
         vertex_state_inversed.change_active_player()
         vertex_value = vertex.value_acc.get()
         X.append(vertex_state)
         Y.append(vertex_value)
         X.append(vertex_state_inversed)
         Y.append(-vertex_value)
         vertex = vertex.parent
     return {'state': X, 'mcts_value': Y}
示例#4
0
    def choose_act(self, mode, info=False):
        current_state_as_dict = StateAsDict(self.env.current_state_of_the_game)
        list_of_actions = self.env.action_space.list_of_actions
        if list_of_actions:
            best_action = None
            best_action_value = -float('inf')
            for action in list_of_actions:
                state_copy = current_state_as_dict.to_state()
                action.execute(state_copy)
                state_copy.change_active_player()
                # print('*******************')
                current_value = self.evaluator.evaluate(state_copy)
                # print(f'State_copy = {StateAsDict(state_copy)}')
                # print(f'Action = {action} val = {current_value}')
                # print('------------------------------------')
                if current_value > best_action_value:
                    best_action_value = current_value
                    best_action = action

            if not info:
                return best_action
            if info:
                return best_action, best_action_value

        else:
            if not info:
                return None
            if info:
                return None, -1
    def choose_act(self, mode):

        self.epsilon = self.epsilon*0.999995
        if not self.explore:
            current_state_as_dict = StateAsDict(self.env.current_state_of_the_game)
            list_of_actions = self.env.action_space.list_of_actions
            if not self.info:
                return self.model.choose_best_action(current_state_as_dict, list_of_actions)
            if self.info:
                return self.model.choose_best_action_with_q_value(current_state_as_dict, list_of_actions)

        if self.explore:
            current_state_as_dict = StateAsDict(self.env.current_state_of_the_game)
            list_of_actions = self.env.action_space.list_of_actions
            p = np.random.uniform(0,1)
            best_action, best_eval = self.model.choose_best_action_with_q_value(current_state_as_dict, list_of_actions)
            if p >= self.epsilon:
                actual_action = best_action
                actual_eval = best_eval
            if p < self.epsilon:
                if list_of_actions:
                    actual_action = random.choice(list_of_actions)
                    actual_eval = self.model.get_q_value(current_state_as_dict, actual_action)[0]
                else:
                    actual_action = None
                    actual_eval = -1
                    best_eval = -1

            return actual_action, actual_eval, best_eval
    def choose_act(self, mode) -> Action:

        #first we load observation to the private environment
        current_points = self.env.current_state_of_the_game.active_players_hand(
        ).number_of_my_points()

        if len(self.env.action_space.list_of_actions) > 0:
            actions = []
            potential_reward_max = self.action_to_avoid
            numerator = self.depth - 1
            primary_state = StateAsDict(self.env.current_state_of_the_game)
            self.env_dict[numerator] = StateAsDict(
                self.env.current_state_of_the_game)
            for action in self.env.action_space.list_of_actions:
                ae = action.evaluate(self.env.current_state_of_the_game)
                potential_reward = (np.floor((current_points + ae["card"][2])/POINTS_TO_WIN) * self.weight[0] +\
                                                 self.weight[1] * ae["card"][2] + self.weight[2] *ae["nobles"] +\
                                                 self.weight[3] * ae["card"][0] + self.weight[4] * sum(ae["gems_flow"]))

                potential_reward -= self.decay * self.deep_evaluation(
                    action, numerator - 1, mode)
                self.restore_env(numerator)

                if self.collect_stats:
                    self.stats_dataframe = self.stats_dataframe.append(
                        {
                            'state': primary_state,
                            'action': action.to_dict(),
                            'evaluation': potential_reward
                        },
                        ignore_index=True)
                    self.stats_dataframe_vectorized = self.stats_dataframe_vectorized.append(
                        {
                            'state_vector': vectorize_state(primary_state),
                            'action_vector': vectorize_action(action),
                            'evaluation': potential_reward
                        },
                        ignore_index=True)

                if potential_reward > potential_reward_max:
                    potential_reward_max = potential_reward
                    actions = []
                    actions.append(action)
                elif potential_reward == potential_reward_max:
                    actions.append(action)

            self.env.reset()
            self.env.load_state_from_dict(self.env_dict[numerator])

            return random.choice(actions)

        else:
            return None
class DeterministicObservation(SplendorObservation):
    def __init__(self, state: State):
        super().__init__('deterministic')
        self.observation_dict = StateAsDict(state)

    def recreate_state(self):
        return self.observation_dict.to_state()
    def deep_evaluation(self, action, numerator, mode):

        self.get_temp_env(action, numerator, mode)

        if numerator > 1:
            current_points = self.env.current_state_of_the_game.active_players_hand(
            ).number_of_my_points()
            self.env_dict[numerator] = StateAsDict(
                self.env.current_state_of_the_game)
            if len(self.env.action_space.list_of_actions) > 0:
                potential_reward_list = []
                for action in self.env.action_space.list_of_actions:
                    ae = action.evaluate(self.env.current_state_of_the_game)
                    potential_reward = (np.floor((current_points + ae["card"][2])/POINTS_TO_WIN) * self.weight[0] +\
                                                     self.weight[1] * ae["card"][2] + self.weight[2] *ae["nobles"] +\
                                                     self.weight[3] * ae["card"][0] + self.weight[4] * sum(ae["gems_flow"]))
                    potential_reward -= self.decay * self.deep_evaluation(
                        action, numerator - 1, mode) * pow(
                            -1, self.depth - numerator + 1)
                    potential_reward_list.append(potential_reward)
                    self.restore_env(numerator)

                self.restore_env(numerator + 1)
                reward = max(potential_reward_list)
            else:
                reward = self.action_to_avoid
        else:
            reward = self.evaluate_actions()

        return reward
示例#9
0
    def choose_act(self, mode) -> Action:

        #first we load observation to the private environment
        current_points = self.env.current_state_of_the_game.active_players_hand(
        ).number_of_my_points()

        if len(self.env.action_space.list_of_actions) > 0:
            actions = []
            points = []
            numerator = self.depth - 1

            self.env_dict[numerator] = StateAsDict(
                self.env.current_state_of_the_game)
            for action in self.env.action_space.list_of_actions:
                ae = action.evaluate(self.env.current_state_of_the_game)
                potential_reward = (np.floor((current_points + ae["card"][2])/POINTS_TO_WIN) * self.weight[0] +\
                                                 self.weight[1] * ae["card"][2] + self.weight[2] *ae["nobles"] +\
                                                 self.weight[3] * ae["card"][0] + self.weight[4] * sum(ae["gems_flow"]))
                points.append(potential_reward)
                actions.append(action)

            values = set(points)
            if len(values) >= self.breadth:
                actions = [
                    actions[i] for i, point in enumerate(points)
                    if point >= sorted(values)[-self.breadth]
                ]
            if len(actions) > 1:
                actions_ = []
                points_ = []
                for action in actions:
                    ae = action.evaluate(self.env.current_state_of_the_game)
                    potential_reward = (np.floor((current_points + ae["card"][2])/POINTS_TO_WIN) * self.weight[0] +\
                                                     self.weight[1] * ae["card"][2] + self.weight[2] *ae["nobles"] +\
                                                     self.weight[3] * ae["card"][0] + self.weight[4] * sum(ae["gems_flow"]))
                    potential_reward -= self.decay * self.deep_evaluation(
                        action, numerator - 1, mode)

                    points_.append(potential_reward)
                    actions_.append(action)
                    self.restore_env(numerator)

                actions = [
                    actions_[i] for i, point in enumerate(points_)
                    if point >= sorted(set(points_))[-1]
                ]

                self.env.reset()
                self.env.load_state_from_dict(self.env_dict[numerator])

            return random.choice(actions)

        else:
            return None
    def generate_all_tree_data(self):
        self.clean_memory()
        all_code = ''
        # BFS
        kiu = [self.root]

        print('Collecting tree data.')

        while len(kiu) > 0:
            # take first:
            node_to_eval = kiu.pop(0)
            if node_to_eval.value_acc.count() > 0:
                for child in node_to_eval.children:
                    if child.value_acc.count() > 0:
                        kiu.append(child)
                        child_state_as_dict = StateAsDict(child.return_state())
                        self.stats_dataframe = self.stats_dataframe.append(
                            {
                                'state': child_state_as_dict.to_state(),
                                'mcts_value': child.value_acc.get()
                            },
                            ignore_index=True)
        return self.stats_dataframe
    def generate_all_tree_data_as_list(self,
                                       confidence_threshold: float = 0.1,
                                       count_threshold: int = 6,
                                       confidence_limit: int = 2):
        self.clean_memory()
        # BFS
        kiu = [self.root]
        confidence_count = 0
        print('Collecting tree data.')
        X = []
        Y = []
        while len(kiu) > 0:
            # take first:
            node_to_eval = kiu.pop(0)
            if node_to_eval.value_acc.count() > 0:
                for child in node_to_eval.children:
                    # if child.value_acc.count() >= count_threshold or child.value_acc.perfect_value is not None:
                    if child.value_acc.get_confidence(
                    ) >= confidence_threshold:
                        confidence_count += 1
                    if (
                            child.value_acc.get_confidence() >= confidence_threshold and confidence_count <= confidence_limit) \
                            or child.value_acc.count() >= count_threshold:
                        kiu.append(child)
                        child_state_as_dict = StateAsDict(child.return_state())
                        current_state = child_state_as_dict.to_state()
                        current_state_inversed = child_state_as_dict.to_state()
                        current_state_inversed.change_active_player()
                        current_value = child.value_acc.get()

                        X.append(current_state)
                        Y.append(current_value)
                        X.append(current_state_inversed)
                        Y.append(-current_value)

        return {'state': X, 'mcts_value': Y}
    def eval_leaf(self, state: State) -> float:
        mode = "deterministic"
        self.env.update_actions_light()
        current_points = self.env.current_state_of_the_game.active_players_hand(
        ).number_of_my_points()

        if len(self.env.action_space.list_of_actions) > 0:
            actions = []
            points = []
            numerator = self.depth - 1
            self.env_dict[numerator] = StateAsDict(
                self.env.current_state_of_the_game)
            for action in self.env.action_space.list_of_actions:
                ae = action.evaluate(self.env.current_state_of_the_game)
                potential_reward = (np.floor((current_points + ae["card"][2])/POINTS_TO_WIN) * self.weight[0] +\
                                                 self.weight[1] * ae["card"][2] + self.weight[2] *ae["nobles"] +\
                                                 self.weight[3] * ae["card"][0] + self.weight[4] * sum(ae["gems_flow"]))
                points.append(potential_reward)
                actions.append(action)

            values = set(points)
            if len(values) >= self.breadth:
                actions = [
                    actions[i] for i, point in enumerate(points)
                    if point >= sorted(values)[-self.breadth]
                ]
            if len(actions) > 1:
                actions_ = []
                points_ = []
                for action in actions:
                    ae = action.evaluate(self.env.current_state_of_the_game)
                    potential_reward = (np.floor((current_points + ae["card"][2])/POINTS_TO_WIN) * self.weight[0] +\
                                                     self.weight[1] * ae["card"][2] + self.weight[2] *ae["nobles"] +\
                                                     self.weight[3] * ae["card"][0] + self.weight[4] * sum(ae["gems_flow"]))
                    potential_reward -= self.decay * self.deep_evaluation(
                        action, numerator - 1, mode)

                    points_.append(potential_reward)
                    self.restore_env(numerator)

                self.env.reset()
                self.env.load_state_from_dict(self.env_dict[numerator])

            return max(points_)

        else:
            return -100
示例#13
0
def evaluate_states(files_dir, dump_dir):
    evaluator = ValueFunction()

    list_of_files = os.listdir(files_dir)
    for file_name in list_of_files:
        with open(os.path.join(files_dir, file_name), 'rb') as f:
            X, _ = pickle.load(f)
            Y = []
        for x in X:
            state_to_eval = StateAsDict(x).to_state()
            Y.append(evaluator.evaluate(state_to_eval))
            del state_to_eval

        with open(os.path.join(dump_dir, file_name), 'wb') as f:
            pickle.dump((X, Y), f)
            print(len(X))
        del X
        del Y
示例#14
0
 def load_state_from_dict(self, state_as_dict: StateAsDict):
     self.current_state_of_the_game = state_as_dict.to_state()
     self.is_done = False
 def __init__(self, state: State):
     super().__init__('deterministic')
     self.observation_dict = StateAsDict(state)
示例#16
0
def dict_to_state(s_dict):
    s_as_dict = StateAsDict()
    s_as_dict.load_from_dict(s_dict)
    state_to_return = s_as_dict.to_state()
    state_to_return.active_player_id = 0
    return state_to_return
 def choose_action(self, state : State) ->Action:
     list_of_actions = generate_all_legal_actions(state)
     return self.model.choose_best_action(StateAsDict(state), list_of_actions)
示例#18
0
    def run_one_duel(self,
                     list_of_agents: List[Agent],
                     starting_agent_id: int = 0,
                     render_game: bool = False) -> GameStatisticsDuels:
        """Runs one game between two agents.
        :param:
        list_of_agents: List of agents to play, they will play in the order given by the list
        starting_agent_id:  Id of the agent who starts the game.
        show_game: If True, GUI will appear showing the game. """

        #prepare the game
        self.env.reset()
        self.env.set_active_player(starting_agent_id)
        #set players names:
        self.env.set_players_names([agent.name for agent in list_of_agents])
        is_done = False
        #set the initial agent id
        active_agent_id = starting_agent_id
        #set the initial observation
        full_state = self.env.current_state_of_the_game
        number_of_actions = 0
        results_dict = {}
        #Id if the player who first reaches number of points to win
        first_winner_id = None
        checked_all_players_after_first_winner = False
        previous_actions = [None]

        if render_game:
            self.env.render()
            time.sleep(GAME_INITIAL_DELAY)

        while number_of_actions < MAX_NUMBER_OF_MOVES and not (
                is_done and checked_all_players_after_first_winner):
            action = list_of_agents[
                active_agent_id].deterministic_choose_action(
                    full_state, previous_actions)
            if action is None:
                print('None action by {}'.format(
                    list_of_agents[active_agent_id].name))
                print('Current state of the game')
                state_str = StateAsDict(self.env.current_state_of_the_game)
                print(state_str)
            previous_actions = [action]
            if render_game:
                self.env.render()
            full_state, reward, is_done, info = self.env.deterministic_step(
                action)
            if is_done:
                results_dict[list_of_agents[active_agent_id].my_name_with_id()] = \
                    OneAgentStatistics(reward, self.env.points_of_player_by_id(active_agent_id), int(reward == 1))
                if first_winner_id is None:
                    first_winner_id = active_agent_id
                checked_all_players_after_first_winner = active_agent_id == (
                    first_winner_id - 1) % len(list_of_agents)
            active_agent_id = (active_agent_id + 1) % len(list_of_agents)

            number_of_actions += 1

        one_game_statistics = GameStatisticsDuels(list_of_agents)
        one_game_statistics.register_from_dict(results_dict)

        print(time.time())

        return one_game_statistics
        'name': 'RandomAgent - uniform_on_types '
    },
    'board': {
        'nobles_on_board': {104, 108, 102},
        'cards_on_board': {33, 4, 69, 71, 72, 41, 9, 10, 44, 79, 86, 26},
        'gems_on_board': [2, 4, 3, 3, 4, 3],
        'deck_order': [{
            'Row.CHEAP': [
                38, 0, 76, 18, 55, 19, 3, 40, 7, 43, 23, 39, 37, 42, 77, 75,
                25, 59, 2, 5, 54, 73, 56, 21, 57, 22, 1, 36, 6, 60, 61, 78, 74,
                58, 24
            ]
        }, {
            'Row.MEDIUM': [
                12, 49, 47, 31, 45, 62, 11, 81, 46, 80, 66, 67, 48, 83, 82, 65,
                8, 84, 13, 29, 27, 64, 85, 63, 30
            ]
        }, {
            'Row.EXPENSIVE':
            [35, 53, 87, 88, 15, 14, 50, 17, 16, 68, 70, 32, 52, 51, 89, 34]
        }]
    },
    'active_player_id': 1
}

stanek = StateAsDict()
stanek.load_from_dict(dict)

fufu = SplendorGUI()
fufu.draw_state(stanek.to_state())
fufu.keep_window_open(300)
示例#20
0
    def run_one_game_and_collect_data(self, debug_info=True):

        last_value_player_0 = None
        last_value_player_1 = None
        old_value = None
        self.env.reset()
        observation = self.env.show_observation('deterministic')
        is_done = False
        number_of_moves = 0

        debug_collected_data = pd.DataFrame(columns=('active_player_id', 'winner_id', 'reward', 'best_value'))
        collected_data = pd.DataFrame(columns=('state_as_vector', 'value'))
        extra_move_done = False

        while not (is_done and extra_move_done) and number_of_moves < MAX_NUMBER_OF_MOVES:

            if is_done:
                extra_move_done = True

            action, best_value = self.agent.choose_action(observation, [None], info=True)
            #print('best value = {}'.format(best_value))
            observation, reward, is_done, info = self.env.step('deterministic', action)
            previous_player_id = self.env.previous_player_id()
            winner_id = info['winner_id']


            current_state_as_dict = StateAsDict(self.env.current_state_of_the_game)

            if previous_player_id == 0:
                old_value = last_value_player_0
            if previous_player_id == 1:
                old_value = last_value_player_1

            if debug_info:
                debug_collected_data = debug_collected_data.append({
                                                        'new_value': self.new_value_formula(old_value, best_value,
                                                                                            winner_id, reward, alpha=0.1),
                                                        'active_player_id' : self.env.previous_player_id(),
                                                        'winner_id' : winner_id,
                                                        'reward' : reward,
                                                        'best_value' : best_value,
                                                        'old_value': old_value,
                                                        'pa_points' : self.env.previous_players_hand().number_of_my_points()},
                                                        ignore_index=True)


            collected_data = collected_data.append({'state_as_vector' : vectorize_state(current_state_as_dict),
                                                        'value': self.new_value_formula(old_value, best_value,
                                                                                            winner_id, reward, alpha=0.1)},
                                                   ignore_index=True)



            if previous_player_id == 0:
                last_value_player_0 = best_value
            if previous_player_id == 1:
                last_value_player_1 = best_value

            #let the opponent move:
            number_of_moves += 1

        if debug_info:
            debug_collected_data.to_csv('debug_info.csv')
        collected_data = collected_data.iloc[2:]
        return collected_data
示例#21
0
    def run_one_game_and_collect_data(self, debug_info=True):

        there_was_no_action = False
        self.agent.train_mode()
        last_actual_player_0 = None
        last_actual_player_1 = None
        last_state_player_0 = None
        last_state_player_1 = None
        last_action_vec_player_0 = None
        last_action_vec_player_1 = None
        old_value = None
        old_state = None
        old_action_vec = None
        self.env.reset()
        observation = self.env.show_observation('deterministic')
        is_done = False
        number_of_moves = 0

        debug_collected_data = pd.DataFrame(columns=('active_player_id', 'winner_id', 'reward', 'best_value'))
        collected_data = pd.DataFrame(columns=('state_as_vector', 'value'))
        extra_move_done = False

        while not (is_done and extra_move_done) and number_of_moves < MAX_NUMBER_OF_MOVES:

            if is_done:
                extra_move_done = True

            current_state_as_dict = StateAsDict(self.env.current_state_of_the_game)

            actual_action, actual_eval, best_eval = self.agent.choose_action(observation, [None])
            if actual_action is None:
                there_was_no_action = True
                break
            #print('best value = {}'.format(best_value))
            observation, reward, is_done, info = self.env.step('deterministic', actual_action)
            previous_player_id = self.env.previous_player_id()
            winner_id = info['winner_id']



            if previous_player_id == 0:
                old_value = last_actual_player_0
                old_state = last_state_player_0
                old_action_vec = last_action_vec_player_0

            if previous_player_id == 1:
                old_value = last_actual_player_1
                old_state = last_state_player_1
                old_action_vec = last_action_vec_player_1

            if debug_info:
                state_status = old_state.__repr__() if old_state is not None else 'NONE'
                state_vector = vectorize_state(old_state) if old_state is not None else 'NONE'
                debug_collected_data = debug_collected_data.append({
                                                        'state_ex' : state_status,
                                                        'state_vec' : state_vector,
                                                        'new_value': self.new_value_formula(old_value, best_eval,
                                                                                            winner_id, reward, self.alpha),
                                                        'active_player_id' : self.env.previous_player_id(),
                                                        'winner_id' : winner_id,
                                                        'reward' : reward,
                                                        'best_eval' : best_eval,
                                                        'actual_eval' : actual_eval,
                                                        'old_value': old_value,
                                                        'pa_points' : self.env.previous_players_hand().number_of_my_points()},
                                                        ignore_index=True)


            if old_state is not None:
                collected_data = collected_data.append({'state_as_vector' : vectorize_state(old_state),
                                                        'action_vector' : old_action_vec,
                                                            'value': self.new_value_formula(old_value, best_eval,
                                                                                                winner_id, reward, self.alpha)},
                                                       ignore_index=True)



            if previous_player_id == 0:
                last_actual_player_0 = actual_eval
                last_state_player_0 = current_state_as_dict
                last_action_vec_player_0 = vectorize_action(actual_action)
            if previous_player_id == 1:
                last_actual_player_1 = actual_eval
                last_state_player_1 = current_state_as_dict
                last_action_vec_player_1 = vectorize_action(actual_action)

            #let the opponent move:
            number_of_moves += 1

        if debug_info:
            debug_collected_data.to_csv('debug_info.csv')
        collected_data = collected_data.iloc[0:]
        self.agent.test_mode()
        return collected_data