Пример #1
0
    def _expansion_simulation(self, leaf_id, win_index):
        leaf_board = self.tree[leaf_id]['board']
        current_player = self.tree[leaf_id]['player']

        if win_index == 0:
            # expansion
            actions = utils.valid_actions(leaf_board)

            for action in actions:
                action_index = action[1]
                child_id = leaf_id + (action_index, )
                child_board = utils.get_board(child_id, self.board_size)
                next_turn = utils.get_turn(child_id)

                self.tree[child_id] = {
                    'board': child_board,
                    'player': next_turn,
                    'parent': leaf_id,
                    'child': [],
                    'n': 0.,
                    'w': 0.,
                    'q': 0.
                }

                self.tree[leaf_id]['child'].append(action_index)

            if self.tree[leaf_id]['parent']:
                # simulation
                board_sim = leaf_board.copy()
                turn_sim = current_player

                while True:
                    actions_sim = utils.valid_actions(board_sim)
                    action_sim = actions_sim[np.random.choice(
                        len(actions_sim))]
                    coord_sim = action_sim[0]

                    if turn_sim == 0:
                        board_sim[coord_sim] = 1
                    else:
                        board_sim[coord_sim] = -1

                    win_idx_sim = utils.check_win(board_sim, self.win_mark)

                    if win_idx_sim == 0:
                        turn_sim = abs(turn_sim - 1)

                    else:
                        reward = utils.get_reward(win_idx_sim, leaf_id)
                        return reward
            else:
                # root node don't simulation
                reward = 0.
                return reward
        else:
            # terminal node don't expansion
            reward = 1.
            return reward
Пример #2
0
    def get_pi(self, root_id, board, turn, tau):
        self.root_id = root_id
        action = utils.valid_actions(board)
        prob = 1 / len(action)
        pi = np.zeros(self.board_size**2, 'float')

        for loc, idx in action:
            pi[idx] = prob

        return pi
Пример #3
0
    def simulation(self, tree, child_id):
        state = deepcopy(tree[child_id]['state'])
        player = deepcopy(tree[child_id]['player'])

        while True:
            win = check_win(state, self.win_mark)
            if win != 0:
                return win
            else:
                actions = valid_actions(state)
                action = random.choice(actions)
                if player == 0:
                    player = 1
                    state[action[0]] = 1
                else:
                    player = 0
                    state[action[0]] = -1
Пример #4
0
    def expansion(self, tree, leaf_id):
        leaf_state = deepcopy(tree[leaf_id]['state'])
        is_terminal = check_win(leaf_state, self.win_mark)
        actions = valid_actions(leaf_state)
        expand_thres = 10

        if leaf_id == (0, ) or tree[leaf_id]['n'] > expand_thres:
            is_expand = True
        else:
            is_expand = False

        if is_terminal == 0 and is_expand:
            # expansion for every possible actions
            childs = []
            for action in actions:
                state = deepcopy(tree[leaf_id]['state'])
                action_index = action[1]
                current_player = tree[leaf_id]['player']

                if current_player == 0:
                    next_turn = 1
                    state[action[0]] = 1
                else:
                    next_turn = 0
                    state[action[0]] = -1

                child_id = leaf_id + (action_index, )
                childs.append(child_id)
                tree[child_id] = {
                    'state': state,
                    'player': next_turn,
                    'child': [],
                    'parent': leaf_id,
                    'n': 0,
                    'w': 0,
                    'q': 0
                }

                tree[leaf_id]['child'].append(action_index)

            child_id = random.sample(childs, 1)
            return tree, child_id[0]
        else:
            # If leaf node is terminal state,
            # just return MCTS tree
            return tree, leaf_id
Пример #5
0
    h.add_only_observation(observation)
    print('Action from POCMP: ', action, 'Real observation: ', observation)
    #Save the 'old' particle list to update afterwards
    old_particle_list = copy.deepcopy(
        pomcp.tree.nodes[pomcp.tree.root_key].particle_list)
    #print('tamanho old list: ', len(old_particle_list))
    pomcp.tree.prune_and_make_new_root(action, observation)
    #print('Historico oficial')
    #h.print_history()
    state_from_history, _ = simulator.get_dummy_state_and_legal_actions_given_history(
        h)
    #Now update the belief state
    pomcp.tree.nodes[pomcp.tree.root_key].particle_list = particle_list_update(
        simulator, old_particle_list, int(pomcp.n_simulations),
        state_from_history, action, observation, 100)
    if len(pomcp.tree.nodes[pomcp.tree.root_key].particle_list) == 0:
        break
print('Out of particles, finishing episode with SelectRandom')
time = 0
while time < 100:
    action = choice(valid_actions(real_initial_state))
    successor_state, observation, reward, is_terminal = simulator.step(
        real_initial_state, action)
    if is_terminal:
        print('Finished')
        break
    h.add(action, observation)

print('Historico oficial')
h.print_history()