def get_value(self, state, action):
        scaled_state = np.multiply(np.asarray(state).flatten(), self.scale_factor)

        tile_indices = asarray(
            tiles(self.iht, self.num_tilings, scaled_state),
            dtype=int) + (action * self.num_tiles)

        return sum(self.theta[tile_indices])
Exemplo n.º 2
0
 def update(self, state, action, nstep_return):
     current_estimate = self.get_value(state, action)
     value = nstep_return - current_estimate
     scaled_state = np.multiply(
         np.asarray(state).flatten(), self.tiling_side_length)
     tile_indices = asarray(tiles(self.iht, self.num_tilings, scaled_state),
                            dtype=int) + (action * self.num_tiles)
     self.theta[tile_indices] += self.alpha * value
    def get_next_states_values(self, state):
        scaled_state = np.multiply(np.asarray(state).flatten(), self.scale_factor)

        values = np.zeros(self.num_actions)
        for action in range(self.num_actions):
            tile_indices = asarray(
                tiles(self.iht, self.num_tilings, scaled_state),
                dtype=int) + (action * self.num_tiles)
            values[action] = np.sum(self.theta[tile_indices])
        return values