def test_learning(self): qlearning = QLearning() old_state = self.empty_state() action = ACTION_SUICIDE new_state = self.empty_state() reward = 10 old_reward = qlearning.q.get_q(old_state, action) qlearning.learn(old_state, new_state, action, reward) self.assertGreater(qlearning.q.get_q(old_state, action), old_reward)
def main(argv): setpath() from q_learning import QLearning from grid_world import Grid # 5 rows, 4 cols, unreachables : [(1,1), (1,3)], pits : [(3,1)], goal : (4,3) g = Grid(5, 4, [(1,1),(1,3)], [(3,1)], (4,3)) q = QLearning(g) q.learn()
def main(argv): setpath() from q_learning import QLearning from grid_world import Grid # 5 rows, 4 cols, unreachables : [(1,1), (1,3)], pits : [(3,1)], goal : (4,3) g = Grid(5, 4, [(1, 1), (1, 3)], [(3, 1)], (4, 3)) q = QLearning(g) q.learn()
class Robot: game = None last_game = None current_state = None last_states = {} last_action = {} delta = None # Keep track of all our robot_ids. Will be useful to detect new robots. robot_ids = set() # default values - will get overridden by rgkit location = (0, 0) hp = 0 player_id = 0 robot_id = 0 def __init__(self): self.qlearning = QLearning() def act(self, game): new_robot = self.robot_id in self.robot_ids self.robot_ids.add(self.robot_id) self.current_state = State.from_game(game, self.player_id, robot_loc=self.location) self.game = game # Explore function if random.randint(0, 3) < 1: print("[Bot " + str(self.robot_id) + "] random action") # print self.state action = self.get_random_action() else: action = self.qlearning.predict(self.current_state) self.last_states[self.robot_id] = self.current_state self.last_action[self.robot_id] = action return State.map_action(action, self.location) def get_possible_actions(self): possible_moves = [s.ACTION_SUICIDE] possible_locs = rg.locs_around(self.location, filter_out=('invalid', 'obstacle')) for loc in possible_locs: possible_moves.append((s.ACTION_MOVE, loc)) possible_moves.append((s.ACTION_ATTACK, loc)) return possible_moves def get_random_action(self): possible_action = self.get_possible_actions() return random.choice(possible_action) # delta = [AttrDict{ # 'loc': loc, # 'hp': hp, # 'player_id': player_id, # 'loc_end': loc_end, # 'hp_end': hp_end # }] # returns new GameState def learn(self): my_delta = [d for d in self.delta if d.loc == self.location] last_state = self.last_states[self.robot_id] action = self.last_action[self.robot_id] future_state = self.current_state reward = self.reward(my_delta) self.qlearning.learn(last_state, future_state, action, reward) @staticmethod def reward(my_delta): damage_taken = my_delta.hp - my_delta.hp_end reward = my_delta.damage_caused - damage_taken return reward def delta_callback(self, delta, actions, new_gamestate): future_game = new_gamestate.get_game_info(self.player_id) print "delta_callback calle" print("Size of Q: " + str(len(self.qlearning.q.hash_matrix))) for (robot_loc, robot) in self.game.robots.items(): if hasattr(robot, 'robot_id') and robot.robot_id in self.robot_ids: action = self.last_action[robot.robot_id] for delta_me in delta: state_template = State.from_game(future_game, self.player_id) if delta_me['loc'] == robot_loc: future_state = copy.deepcopy(state_template) future_state.robot_loc = delta_me.loc_end reward = self.reward(delta_me) self.qlearning.learn(self.current_state, future_state, action, reward)
from q_state import next_state, random_state, actions from q_learning import QLearning from q_table import QTable if __name__ == "__main__": episode = 100 model_save_interval = 10 table = QTable(actions) learning = QLearning(table) for step in range(episode): init_state = random_state() i = 0 reward = 0 while reward != 1: state = init_state while True: i += 1 action = learning.choose_action(state) state2, reward, done = next_state(state, action, table) learning.learn(state, action, reward, state2, done) if done: break state = state2 print(init_state, i, len(table.q_table)) if (step + 1) % model_save_interval == 0: table.save()