示例#1
0
 def get_neighborlist(self):
     # Logic retrieved from tutor code: https://gist.github.com/tttor/826be15b99bb4b33a50787d7eb7b5fda
     neighborlist = []
     for action in self.moves:
         data = [x[:] for x in self.lasertank.grid_data]
         temp = LaserTankMap(self.lasertank.x_size, self.lasertank.y_size,
                             data, self.lasertank.player_x,
                             self.lasertank.player_y,
                             self.lasertank.player_heading)
         temp.apply_move(action)
         neighbor = LaserTankState(temp, 1, self.flag_pos)
         neighborlist.append((neighbor, action))
     return neighborlist
示例#2
0
    def train_q_learning(self, simulator: LaserTankMap):
        print('q-learning')
        """
        Train the agent using Q-learning, building up a table of Q-values.
        :param simulator: A simulator for collecting episode data (LaserTankMap instance)
        """

        # Q(s, a) table
        # suggested format: key = hash(state), value = dict(mapping actions to values)
        q_values = {}

        #
        # TODO
        # Write your Q-Learning implementation here.
        #
        # When this method is called, you are allowed up to [state.time_limit] seconds of compute time. You should
        # continue training until the time limit is reached.
        #
        start = time.time()
        reward_list = []
        episode_reward = []
        while time.time() - start < simulator.time_limit:
            s = simulator.__hash__()
            a = self.choose_action(simulator, q_values)
            if s not in q_values:
                q_values[s] = {}
            q_s = q_values[s]
            if a in q_s:
                old_q = q_s[a]
            else:
                old_q = .0

            r, episode_finished = simulator.apply_move(a)
            reward_list.append(r)
            next_s = simulator.__hash__()
            if next_s not in q_values:
                q_values[next_s] = {}
            next_s_q = {}

            for action in simulator.MOVES:
                # print(action)
                next_s_q[action] = .0
                if action in q_values[next_s]:
                    next_s_q[action] = q_values[next_s][action]

            best_next_q = next_s_q[dict_argmax(next_s_q)]

            # update q_values(s,a,r,old_q,best_next_q)
            td = r + (simulator.gamma * best_next_q) - old_q
            q_values[s][a] = old_q + (self.learning_rate * td)
            if episode_finished:
                episode_reward.append(sum(reward_list))
                reward_list = []
                simulator.reset_to_start()
        df = pd.DataFrame(episode_reward)
        # df.to_csv('episode.csv', index=False)
        # store the computed Q-values
        self.q_values = q_values
示例#3
0
    def action_move(self, action):
        new_grid = [row[:] for row in self.grid]
        new_player = LaserTankMap(self.x_size, self.y_size, new_grid,
                                  self.coord_x, self.coord_y,
                                  self.player_heading)
        # Move Forward
        if action == 'W':
            result = new_player.apply_move('f')
            path_to_take = 'f'

        # Turn Clockwise
        elif action == 'D':
            result = new_player.apply_move('r')
            path_to_take = 'r'

        # Turn Counter-Clockwise
        elif action == 'A':
            result = new_player.apply_move('l')
            path_to_take = 'l'

        # Shoot Laser
        elif action == 'S':
            result = new_player.apply_move('s')
            path_to_take = 's'
        else:
            print("No/Worng Action Input")

        if result == 0:
            new_state = PlayerTank(new_player.grid_data, self.cost + 1,
                                   new_player.player_x, new_player.player_y,
                                   action, self.path + [path_to_take],
                                   self.x_size, self.y_size,
                                   new_player.player_heading)
        elif result == 1:
            new_state = 0
        elif result == 2:
            new_state = 0
        return new_state
    def get_successor(self):
        next_states = []
        for move in self.moves:
            new_data = [row[:] for row in self.game_map.grid_data]
            new_map = LaserTankMap(self.game_map.x_size,
                                   self.game_map.y_size,
                                   new_data,
                                   player_x=self.game_map.player_x,
                                   player_y=self.game_map.player_y,
                                   player_heading=self.game_map.player_heading)
            # new_state = deepcopy(self.get_map())
            new_parents = [row[:] for row in self.parents]
            # new_parents = deepcopy(self.parents)
            if new_map.apply_move(move) == LaserTankMap.SUCCESS:
                new_parents.append(move)
                nextState = State(new_map, 1, new_parents)
                next_states.append((nextState, move))

        return next_states