Пример #1
0
def load_data(file_name):
    with open(file_name, "r") as file:
        data = json.load(file)
        data_decoded = jsonpickle.decode(data)

    states = list()
    actions = list()
    rewards = list()
    next_states = list()

    for state, action, reward, next_state in data_decoded:
        state_idx = GameState.get_state_id(state)
        states.append(state_idx)

        next_state_idx = GameState.get_state_id(next_state)
        next_states.append(next_state_idx)

        actions.append(int(action))

        rewards.append(reward)

    states = np.stack(states)
    actions = np.stack(actions)
    rewards = np.stack(rewards)
    next_states = np.stack(next_states)

    return states, actions, rewards, next_states
Пример #2
0
    def draw_outcome(self, is_child_turn):
        font = pg.font.SysFont("Roboto", 85)
        position_x = 800
        position_y = 900
        game_state = GameState()
        is_finished = game_state.isFinished()
        print(is_finished)

        ### THIS IS NOT DISPLAYING THE OUTPUT
        if not is_child_turn and is_finished:
            text = font.render('Child Wins', True, (0, 0, 0))
            self.surface.blit(text, (position_x, position_y))
        elif is_child_turn and is_finished:
            text = font.render('Robot Wins', True, (0, 0, 0))
            self.surface.blit(text, (position_x, position_y))
Пример #3
0
    def update(self, state, action, new_state):
        # reward function
        if state.get_score('child') < new_state.get_score('child'):
            reward = 1
        else:
            reward = -1

        # update q-table
        new_state_idx = GameState.get_state_id(new_state)
        alpha = 0.8
        gamma = 0.99
        V_star = np.max(self.q_table[new_state_idx, :])
        q_sa = self.Q(state, action)
        q_value = (1 - alpha) * q_sa + alpha * (reward + gamma * V_star)

        state_id = GameState.get_state_id(state)

        self.q_table[state_id, action] = q_value
Пример #4
0
def play_game(robot, child):
    state = GameState()
    visualization = View()
    num_actions = 0

    while not state.isFinished():
        visualization.update(state)
        time.sleep(3)

        valid_actions = state.valid_actions()

        if state.is_child_turn:
            action = child.policy(state)
            num_actions += 1

        else:
            action = robot.policy(state)
            robot.give_demonstration(action, state)

        old_state = deepcopy(state)
        print(GameState.get_state_id(state))
        state, reward, done, info = state.make_action(action)

        # reward function
        if old_state.get_score('child') < state.get_score('child'):
            reward = 1
        else:
            reward = -1
        child.update(old_state, action, reward, state)

    visualization.update(state)
    time.sleep(1.5)

    if state.is_child_turn and state.isFinished():
        outcome = 'Failure'
    elif not state.is_child_turn and state.isFinished():
        outcome = 'Success'

    return outcome, num_actions
Пример #5
0
def play_game(robot,
              child,
              *,
              isTraining=True,
              use_demonstrations=False,
              use_explanations=False,
              metrics=None):
    state = GameState()
    if metrics is None:
        metrics = {
            "total_explanations": 0,
            "total_demonstrations": 0,
            "total_actions": 0,
            "total_experience": 0
        }
    while not state.isFinished():
        valid_actions = state.valid_actions()

        if state.is_child_turn:
            action = child.policy(state)
        else:
            action = robot.policy(state)

            if isTraining:
                if use_demonstrations:
                    demonstrations = robot.give_examples()
                    child.examples_update(demonstrations)
                    metrics["total_demonstrations"] += len(demonstrations)
                    metrics["total_experience"] += len(demonstrations)

                if use_explanations:
                    explanations = robot.generate_explanations(action, state)
                    child.explanation_update(explanations)
                    metrics["total_explanations"] += len(explanations)
                    metrics["total_experience"] += len(explanations)

        old_state_idx = GameState.get_state_id(state)
        old_score = state.get_score('child')
        state, reward, done, info = state.make_action(action)
        new_state_idx = GameState.get_state_id(state)

        if isTraining:
            reward = old_score - state.get_score('child')
            child.update(old_state_idx, action, reward, new_state_idx)
            metrics["total_actions"] += 1
            metrics["total_experience"] += 1

    if state.is_child_turn and state.isFinished():
        outcome = 0
    elif not state.is_child_turn and state.isFinished():
        outcome = 1
    return outcome, metrics
Пример #6
0
def play_game(robot, child, isTraining=True):
    state = GameState()
    num_actions = 0
    while not state.isFinished():
        valid_actions = state.valid_actions()

        if state.is_child_turn:
            action = child.policy(state)
            num_actions += 1

        else:
            action = robot.policy(state)

            if isTraining:
                (demonstration_state,
                 demonstration_action,
                 demonstration_reward,
                 demonstration_new_state) = robot.give_demonstration(action,
                                                                     state)
                child.demonstration_update(demonstration_state,
                                           demonstration_action,
                                           demonstration_reward,
                                           demonstration_new_state)

                #examples = robot.give_examples()
                #child.examples_update(examples)

                explanations = robot.generate_explanations(action,state)
                child.explanation_update(explanations)

        old_state_idx = GameState.get_state_id(state)
        old_score = state.get_score('child')
        state, reward, done, info = state.make_action(action)
        new_state_idx = GameState.get_state_id(state)

        if isTraining:
            reward = old_score - state.get_score('child')
            child.update(old_state_idx, action, reward, new_state_idx)

    if state.is_child_turn and state.isFinished():
        outcome = -1
    elif not state.is_child_turn and state.isFinished():
        outcome = 1
    return outcome, num_actions
Пример #7
0
    def Q(self, state, action):
        state_id = GameState.get_state_id(state)
        q_value = self.q_table[state_id, action]

        return q_value
Пример #8
0
        nodes = list()

        for valid_action in self.valid_actions():
            env_copy = self.lazy_copy()
            env_copy.make_action(valid_action)
            nodes.append(env_copy)

        return nodes

    def lazy_copy(self):
        other = super(Node, self).lazy_copy()
        return Node(balls=other.balls, is_child_turn=other.is_child_turn)


@cachetools.cached(cache=cachetools.Cache(int(1e5)),
                   key=lambda n, d: (GameState.get_state_id(n), d))
def minimax(node, depth):
    score = node.score()
    isFinished = node.isFinished()
    maximizingPlayer = node.is_child_turn

    if depth == 0 or isFinished:
        return score

    children = node.children()

    if maximizingPlayer:
        value = -1
        for child in children:
            value = max(value, minimax(child, depth - 1))
        return value
Пример #9
0
            text2 = font.render(exp2, True, (0, 0, 0))
            self.surface.blit(text2, (50, 800))

    def update(self, game_state):
        self.draw_empty_board()
        self.draw_score('robot', game_state.get_score('robot'))
        self.draw_score('child', game_state.get_score('child'))
        self.draw_outcome(game_state.is_child_turn)
        self.draw_explanation(game_state.is_child_turn, game_state)
        self.draw_balls(game_state.balls)
        pg.display.update()


if __name__ == "__main__":
    from minmax import GameState
    import time
    state = GameState()
    visualization = View()

    while not state.isFinished():
        visualization.update(state)
        time.sleep(0.25)

        valid_actions = state.valid_actions()
        valid_action_idx = np.random.randint(len(valid_actions))
        action = valid_actions[valid_action_idx]
        state, reward, done, info = state.make_action(action)

    visualization.update(state)
    time.sleep(1.5)