def load_data(file_name): with open(file_name, "r") as file: data = json.load(file) data_decoded = jsonpickle.decode(data) states = list() actions = list() rewards = list() next_states = list() for state, action, reward, next_state in data_decoded: state_idx = GameState.get_state_id(state) states.append(state_idx) next_state_idx = GameState.get_state_id(next_state) next_states.append(next_state_idx) actions.append(int(action)) rewards.append(reward) states = np.stack(states) actions = np.stack(actions) rewards = np.stack(rewards) next_states = np.stack(next_states) return states, actions, rewards, next_states
def draw_outcome(self, is_child_turn): font = pg.font.SysFont("Roboto", 85) position_x = 800 position_y = 900 game_state = GameState() is_finished = game_state.isFinished() print(is_finished) ### THIS IS NOT DISPLAYING THE OUTPUT if not is_child_turn and is_finished: text = font.render('Child Wins', True, (0, 0, 0)) self.surface.blit(text, (position_x, position_y)) elif is_child_turn and is_finished: text = font.render('Robot Wins', True, (0, 0, 0)) self.surface.blit(text, (position_x, position_y))
def update(self, state, action, new_state): # reward function if state.get_score('child') < new_state.get_score('child'): reward = 1 else: reward = -1 # update q-table new_state_idx = GameState.get_state_id(new_state) alpha = 0.8 gamma = 0.99 V_star = np.max(self.q_table[new_state_idx, :]) q_sa = self.Q(state, action) q_value = (1 - alpha) * q_sa + alpha * (reward + gamma * V_star) state_id = GameState.get_state_id(state) self.q_table[state_id, action] = q_value
def play_game(robot, child): state = GameState() visualization = View() num_actions = 0 while not state.isFinished(): visualization.update(state) time.sleep(3) valid_actions = state.valid_actions() if state.is_child_turn: action = child.policy(state) num_actions += 1 else: action = robot.policy(state) robot.give_demonstration(action, state) old_state = deepcopy(state) print(GameState.get_state_id(state)) state, reward, done, info = state.make_action(action) # reward function if old_state.get_score('child') < state.get_score('child'): reward = 1 else: reward = -1 child.update(old_state, action, reward, state) visualization.update(state) time.sleep(1.5) if state.is_child_turn and state.isFinished(): outcome = 'Failure' elif not state.is_child_turn and state.isFinished(): outcome = 'Success' return outcome, num_actions
def play_game(robot, child, *, isTraining=True, use_demonstrations=False, use_explanations=False, metrics=None): state = GameState() if metrics is None: metrics = { "total_explanations": 0, "total_demonstrations": 0, "total_actions": 0, "total_experience": 0 } while not state.isFinished(): valid_actions = state.valid_actions() if state.is_child_turn: action = child.policy(state) else: action = robot.policy(state) if isTraining: if use_demonstrations: demonstrations = robot.give_examples() child.examples_update(demonstrations) metrics["total_demonstrations"] += len(demonstrations) metrics["total_experience"] += len(demonstrations) if use_explanations: explanations = robot.generate_explanations(action, state) child.explanation_update(explanations) metrics["total_explanations"] += len(explanations) metrics["total_experience"] += len(explanations) old_state_idx = GameState.get_state_id(state) old_score = state.get_score('child') state, reward, done, info = state.make_action(action) new_state_idx = GameState.get_state_id(state) if isTraining: reward = old_score - state.get_score('child') child.update(old_state_idx, action, reward, new_state_idx) metrics["total_actions"] += 1 metrics["total_experience"] += 1 if state.is_child_turn and state.isFinished(): outcome = 0 elif not state.is_child_turn and state.isFinished(): outcome = 1 return outcome, metrics
def play_game(robot, child, isTraining=True): state = GameState() num_actions = 0 while not state.isFinished(): valid_actions = state.valid_actions() if state.is_child_turn: action = child.policy(state) num_actions += 1 else: action = robot.policy(state) if isTraining: (demonstration_state, demonstration_action, demonstration_reward, demonstration_new_state) = robot.give_demonstration(action, state) child.demonstration_update(demonstration_state, demonstration_action, demonstration_reward, demonstration_new_state) #examples = robot.give_examples() #child.examples_update(examples) explanations = robot.generate_explanations(action,state) child.explanation_update(explanations) old_state_idx = GameState.get_state_id(state) old_score = state.get_score('child') state, reward, done, info = state.make_action(action) new_state_idx = GameState.get_state_id(state) if isTraining: reward = old_score - state.get_score('child') child.update(old_state_idx, action, reward, new_state_idx) if state.is_child_turn and state.isFinished(): outcome = -1 elif not state.is_child_turn and state.isFinished(): outcome = 1 return outcome, num_actions
def Q(self, state, action): state_id = GameState.get_state_id(state) q_value = self.q_table[state_id, action] return q_value
nodes = list() for valid_action in self.valid_actions(): env_copy = self.lazy_copy() env_copy.make_action(valid_action) nodes.append(env_copy) return nodes def lazy_copy(self): other = super(Node, self).lazy_copy() return Node(balls=other.balls, is_child_turn=other.is_child_turn) @cachetools.cached(cache=cachetools.Cache(int(1e5)), key=lambda n, d: (GameState.get_state_id(n), d)) def minimax(node, depth): score = node.score() isFinished = node.isFinished() maximizingPlayer = node.is_child_turn if depth == 0 or isFinished: return score children = node.children() if maximizingPlayer: value = -1 for child in children: value = max(value, minimax(child, depth - 1)) return value
text2 = font.render(exp2, True, (0, 0, 0)) self.surface.blit(text2, (50, 800)) def update(self, game_state): self.draw_empty_board() self.draw_score('robot', game_state.get_score('robot')) self.draw_score('child', game_state.get_score('child')) self.draw_outcome(game_state.is_child_turn) self.draw_explanation(game_state.is_child_turn, game_state) self.draw_balls(game_state.balls) pg.display.update() if __name__ == "__main__": from minmax import GameState import time state = GameState() visualization = View() while not state.isFinished(): visualization.update(state) time.sleep(0.25) valid_actions = state.valid_actions() valid_action_idx = np.random.randint(len(valid_actions)) action = valid_actions[valid_action_idx] state, reward, done, info = state.make_action(action) visualization.update(state) time.sleep(1.5)