示例#1
0
 def test_learning(self):
     qlearning = QLearning()
     old_state = self.empty_state()
     action = ACTION_SUICIDE
     new_state = self.empty_state()
     reward = 10
     old_reward = qlearning.q.get_q(old_state, action)
     qlearning.learn(old_state, new_state, action, reward)
     self.assertGreater(qlearning.q.get_q(old_state, action), old_reward)
示例#2
0
def main(argv):
  setpath()
  from q_learning import QLearning
  from grid_world import Grid

  # 5 rows, 4 cols, unreachables : [(1,1), (1,3)], pits : [(3,1)], goal : (4,3)
  g = Grid(5, 4, [(1,1),(1,3)], [(3,1)], (4,3))
  q = QLearning(g)
  q.learn()
示例#3
0
def main(argv):
    setpath()
    from q_learning import QLearning
    from grid_world import Grid

    # 5 rows, 4 cols, unreachables : [(1,1), (1,3)], pits : [(3,1)], goal : (4,3)
    g = Grid(5, 4, [(1, 1), (1, 3)], [(3, 1)], (4, 3))
    q = QLearning(g)
    q.learn()
示例#4
0
class Robot:
    game = None
    last_game = None

    current_state = None
    last_states = {}
    last_action = {}

    delta = None

    # Keep track of all our robot_ids. Will be useful to detect new robots.
    robot_ids = set()

    # default values - will get overridden by rgkit
    location = (0, 0)
    hp = 0
    player_id = 0
    robot_id = 0

    def __init__(self):
        self.qlearning = QLearning()

    def act(self, game):
        new_robot = self.robot_id in self.robot_ids
        self.robot_ids.add(self.robot_id)

        self.current_state = State.from_game(game, self.player_id,
                                             robot_loc=self.location)
        self.game = game

        # Explore function
        if random.randint(0, 3) < 1:
            print("[Bot " + str(self.robot_id) + "] random action")
            # print self.state
            action = self.get_random_action()
        else:
            action = self.qlearning.predict(self.current_state)

        self.last_states[self.robot_id] = self.current_state
        self.last_action[self.robot_id] = action

        return State.map_action(action, self.location)

    def get_possible_actions(self):
        possible_moves = [s.ACTION_SUICIDE]
        possible_locs = rg.locs_around(self.location,
                                       filter_out=('invalid', 'obstacle'))
        for loc in possible_locs:
            possible_moves.append((s.ACTION_MOVE, loc))
            possible_moves.append((s.ACTION_ATTACK, loc))
        return possible_moves

    def get_random_action(self):
        possible_action = self.get_possible_actions()
        return random.choice(possible_action)

    # delta = [AttrDict{
    #    'loc': loc,
    #    'hp': hp,
    #    'player_id': player_id,
    #    'loc_end': loc_end,
    #    'hp_end': hp_end
    # }]
    # returns new GameState

    def learn(self):
        my_delta = [d for d in self.delta if d.loc == self.location]
        last_state = self.last_states[self.robot_id]
        action = self.last_action[self.robot_id]
        future_state = self.current_state
        reward = self.reward(my_delta)
        self.qlearning.learn(last_state, future_state, action, reward)

    @staticmethod
    def reward(my_delta):
        damage_taken = my_delta.hp - my_delta.hp_end
        reward = my_delta.damage_caused - damage_taken
        return reward

    def delta_callback(self, delta, actions, new_gamestate):
        future_game = new_gamestate.get_game_info(self.player_id)
        print "delta_callback calle"
        print("Size of Q: " + str(len(self.qlearning.q.hash_matrix)))
        for (robot_loc, robot) in self.game.robots.items():
            if hasattr(robot, 'robot_id') and robot.robot_id in self.robot_ids:
                action = self.last_action[robot.robot_id]

                for delta_me in delta:
                    state_template = State.from_game(future_game,
                                                     self.player_id)
                    if delta_me['loc'] == robot_loc:
                        future_state = copy.deepcopy(state_template)
                        future_state.robot_loc = delta_me.loc_end
                        reward = self.reward(delta_me)
                        self.qlearning.learn(self.current_state, future_state,
                                             action, reward)
示例#5
0
from q_state import next_state, random_state, actions
from q_learning import QLearning
from q_table import QTable

if __name__ == "__main__":
    episode = 100
    model_save_interval = 10

    table = QTable(actions)
    learning = QLearning(table)

    for step in range(episode):
        init_state = random_state()
        i = 0
        reward = 0
        while reward != 1:
            state = init_state
            while True:
                i += 1
                action = learning.choose_action(state)
                state2, reward, done = next_state(state, action, table)
                learning.learn(state, action, reward, state2, done)
                if done:
                    break
                state = state2
        print(init_state, i, len(table.q_table))
        if (step + 1) % model_save_interval == 0:
            table.save()