def get_action(self, observation, reward = 0): monsters = hf.get_monsters(observation) mario = hf.get_mario(monsters) state_arr = hf.getOkolica(observation,4,4,4,4) state = state_arr.tostring() action = self.get_q_action(state) self.propagate_reward(reward) if self.debug: self.print_world(state, state_arr) return action
def get_action(self, observation, reward = 0): monsters = hf.get_monsters(observation) mario = hf.get_mario(monsters) state_arr = hf.getOkolica(observation,4,4,4,4) state = state_arr.tostring() if (state.find("V") != -1 and abs(state.find("V") - state.find("M")) <5): f = min(state.find("V"), state.find("M")) t = max(state.find("V"), state.find("M")) +1 state = state[f:t] action = self.get_q_action(state) self.propagate_reward(reward) if self.debug: self.print_world(state, state_arr) return action
def print_world(self, s = [], sa = [], ok=100): global all_scores, q, trial_actions, state, state_arr, \ observation, mario, monsters observation = self.last_observation monsters = hf.get_monsters(observation) mario = hf.get_mario(monsters) all_cores = self.all_scores trial_actions = self.trial_actions if len(sa) > 0: state_arr = sa if len(s) > 0: state = s q = self.Q print "--------------------------------------------------" s = hf.getOkolica(observation,ok,ok,ok,ok) print "step: %d reward: %.2f " % \ (self.trial_steps, self.trial_reward) print "\n".join(["".join(i) for i in s]) print "x: %2.2f y: %2.2f q-len: %d " % \ (mario.x, mario.y, len(self.Q)) print ""