def __init__(self, game, actions, levels=2, epsilon=0.1, fov=2, learner_class=SARSA): self.game = game self.score = 0 self.accumulated = 0 self.fov = fov self.learner_class = learner_class self.epsilon = epsilon self.alpha = 0.2 self.gamma = 0.9 left = learner_class(actions, epsilon) right = learner_class(actions, epsilon) self.learner = MetaLearner(left, right, epsilon, alpha=0.2, gamma=0.9) self.learning = True self.dephased = False
def replace_actions(self, actions): left = self.learner_class(actions, self.epsilon) right = self.learner_class(actions, self.epsilon) self.learner = MetaLearner(left, right, self.epsilon, self.alpha, self.gamma)
class MetaAgent(Agent): def __init__(self, game, actions, levels=2, epsilon=0.1, fov=2, learner_class=SARSA): self.game = game self.score = 0 self.accumulated = 0 self.fov = fov self.learner_class = learner_class self.epsilon = epsilon self.alpha = 0.2 self.gamma = 0.9 left = learner_class(actions, epsilon) right = learner_class(actions, epsilon) self.learner = MetaLearner(left, right, epsilon, alpha=0.2, gamma=0.9) self.learning = True self.dephased = False def replace_actions(self, actions): left = self.learner_class(actions, self.epsilon) right = self.learner_class(actions, self.epsilon) self.learner = MetaLearner(left, right, self.epsilon, self.alpha, self.gamma) def set_epsilon(self, epsilon): def set_epsilon(learner, epsilon): if hasattr(learner, "left_learner"): set_epsilon(learner.left_learner, epsilon) if hasattr(learner, "right_learner"): set_epsilon(learner.right_learner, epsilon) learner.epsilon = epsilon set_epsilon(self.learner, epsilon) def set_states(self, last_action=False): # active state state_left = self.get_fov(self.fov) # exploration state state_right = self.get_fov(self.fov * 2) # changed to 2 if not self.game.easy: # if easy, then exploration is just better range... state_right = state_right[0] + state_right[1] # don't distinguish between items if last_action: state_right = (state_right, self.learner.right_learner.current_action) self.learner.set_state(state_left, state_right) # sets all states def perform(self, explore=True, last_action=True, verbose=0): self.verbose = verbose self.set_states(last_action) final_action = self.decide(self.learner) # selects recursively self.learner.left_learner.update_actions(final_action) self.learner.right_learner.update_actions(final_action) if verbose == 3: print "mouse is facing {0} with state {1}".format(self.game.direction, (state_left, state_right)) self.game.render() c = raw_input("continue...") self.game.play(final_action) reward = self.check_reward() value = self.calc_reward(reward) self.next_states = (self.get_fov(self.fov), self.get_fov(self.fov * 2)) self.reward(value) return reward # deciding for top (main) learner def decide(self, choice): self.selections = [] learner = self.learner decision = self._decide(learner) return decision def _decide(self, learner): choice = learner.select() if self.verbose == 3: print " -> {0}".format(choice), if choice in [self.learner.left_learner, self.learner.right_learner]: return self._decide(choice) # meta choice elif choice == "now": # history choice new_learner = learner.left_learner elif choice == "next": new_learner = learner.history_learner else: # final choice return choice self.selections.append(learner) return self._decide(new_learner) def reward(self, value): if not self.is_hunger(value): self.accumulated += value if not self.learning: return self.learner.learn(value, self.next_states)
class MetaAgent(Agent): def __init__(self, game, actions, levels=2, epsilon=0.1, fov=2, learner_class=SARSA): self.game = game self.score = 0 self.accumulated = 0 self.fov = fov self.learner_class = learner_class self.epsilon = epsilon self.alpha = 0.2 self.gamma = 0.9 left = learner_class(actions, epsilon) right = learner_class(actions, epsilon) self.learner = MetaLearner(left, right, epsilon, alpha=0.2, gamma=0.9) self.learning = True self.dephased = False def replace_actions(self, actions): left = self.learner_class(actions, self.epsilon) right = self.learner_class(actions, self.epsilon) self.learner = MetaLearner(left, right, self.epsilon, self.alpha, self.gamma) def set_epsilon(self, epsilon): def set_epsilon(learner, epsilon): if hasattr(learner, 'left_learner'): set_epsilon(learner.left_learner, epsilon) if hasattr(learner, 'right_learner'): set_epsilon(learner.right_learner, epsilon) learner.epsilon = epsilon set_epsilon(self.learner, epsilon) def set_states(self, last_action=False): # active state state_left = self.get_fov(self.fov) # exploration state state_right = self.get_fov(self.fov * 2) # changed to 2 if not self.game.easy: # if easy, then exploration is just better range... state_right = state_right[0] + state_right[ 1] # don't distinguish between items if last_action: state_right = (state_right, self.learner.right_learner.current_action) self.learner.set_state(state_left, state_right) # sets all states def perform(self, explore=True, last_action=True, verbose=0): self.verbose = verbose self.set_states(last_action) final_action = self.decide(self.learner) # selects recursively self.learner.left_learner.update_actions(final_action) self.learner.right_learner.update_actions(final_action) if verbose == 3: print 'mouse is facing {0} with state {1}'.format( self.game.direction, (state_left, state_right)) self.game.render() c = raw_input('continue...') self.game.play(final_action) reward = self.check_reward() value = self.calc_reward(reward) self.next_states = (self.get_fov(self.fov), self.get_fov(self.fov * 2)) self.reward(value) return reward # deciding for top (main) learner def decide(self, choice): self.selections = [] learner = self.learner decision = self._decide(learner) return decision def _decide(self, learner): choice = learner.select() if self.verbose == 3: print ' -> {0}'.format(choice), if choice in [self.learner.left_learner, self.learner.right_learner]: return self._decide(choice) # meta choice elif choice == 'now': # history choice new_learner = learner.left_learner elif choice == 'next': new_learner = learner.history_learner else: # final choice return choice self.selections.append(learner) return self._decide(new_learner) def reward(self, value): if not self.is_hunger(value): self.accumulated += value if not self.learning: return self.learner.learn(value, self.next_states)