Пример #1
0
    def to_vector(self, board: PodBoard, pod: PodState) -> List[float]:
        # Velocity is already relative to the pod, so it just needs to be rotated
        vel = pod.vel.rotate(-pod.angle) / Constants.max_vel()

        check1 = (board.get_check(pod.nextCheckId) -
                  pod.pos).rotate(-pod.angle) / MAX_DIST
        check2 = (board.get_check(pod.nextCheckId + 1) -
                  pod.pos).rotate(-pod.angle) / MAX_DIST

        return [vel.x, vel.y, check1.x, check1.y, check2.x, check2.y]
Пример #2
0
    def test_get_best_action_works_behind_right(self):
        board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)])
        # Pod is directly right of check, but facing away (slightly to the right)
        pod = PodState(Vec2(7000, 5000))
        pod.angle = -0.000001

        self.__do_get_best_action_assert(board, pod, 0, -Constants.max_turn())
Пример #3
0
    def test_get_best_action_works_right(self):
        board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)])
        # Pod is directly below the check, but the check is behind and to its right
        pod = PodState(Vec2(5000, 0))
        pod.angle = math.pi * 1.25

        self.__do_get_best_action_assert(board, pod, 0, -Constants.max_turn())
Пример #4
0
    def test_get_best_action_works_straight(self):
        board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)])
        # Pod is directly below the check, but looking straight at it
        pod = PodState(Vec2(5000, 0))
        pod.angle = math.pi / 2

        self.__do_get_best_action_assert(board, pod, Constants.max_thrust(), 0)
Пример #5
0
def _to_state(board: PodBoard, pod: PodState) -> Tuple[int, int, int, int]:
    vel = pod.vel.rotate(-pod.angle)

    check1 = (board.get_check(pod.nextCheckId) - pod.pos).rotate(-pod.angle)

    return (
        _discretize(vel.x / Constants.max_vel(), 10),
        _discretize(vel.y / Constants.max_vel(), 10),
        _discretize(check1.x / MAX_DIST, 30),
        _discretize(check1.y / MAX_DIST, 30),
    )
    def test_state_to_vector_works1(self):
        # A pod at (100, 100) pointing down -X, moving full speed +Y
        pod = PodState(Vec2(100, 100), Vec2(0, Constants.max_vel()), -math.pi)
        # The target checkpoint is directly behind it
        board = PodBoard([Vec2(100 + MAX_DIST, 100), ORIGIN])

        state = state_to_vector(pod, board)

        self.assertEqual(len(state), STATE_VECTOR_LEN)
        self.assertAlmostEqual(state[0], 0, msg="velocity x")
        self.assertAlmostEqual(state[1], -1, msg="velocity y")
        self.assertAlmostEqual(state[2], -1, msg="check1 x")
        self.assertAlmostEqual(state[3], 0, msg="check1 y")
Пример #7
0
def pgr(board: PodBoard, pod: PodState) -> float:
    """
    Pretty Good Reward
    Attempts to estimate the distance without using a SQRT calculation.
    """
    pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos
    prev_to_next_check = board.checkpoints[pod.nextCheckId] - board.get_check(pod.nextCheckId - 1)
    pod_dist_estimate = (math.fabs(pod_to_check.x) + math.fabs(pod_to_check.y)) / 2
    check_dist_estimate = (math.fabs(prev_to_next_check.x) + math.fabs(prev_to_next_check.y)) / 2
    dist_estimate = pod_dist_estimate / check_dist_estimate

    checks_hit = len(board.checkpoints) * pod.laps + pod.nextCheckId

    return 2*checks_hit - dist_estimate + 1
    def test_state_to_vector_works2(self):
        # A pod at (-100, -100) pointing up +Y, moving 45 degrees down-left
        pod = PodState(Vec2(-100, -100), Vec2(-3, -3), math.pi / 2)
        # The target checkpoint is directly in front
        board = PodBoard([Vec2(-100, 1000), ORIGIN])

        state = state_to_vector(pod, board)

        self.assertEqual(len(state), STATE_VECTOR_LEN)
        self.assertAlmostEqual(state[0],
                               -3 / Constants.max_vel(),
                               msg="velocity x")
        self.assertAlmostEqual(state[1],
                               3 / Constants.max_vel(),
                               msg="velocity y")
        self.assertAlmostEqual(state[2], 1100 / MAX_DIST, msg="check1 x")
        self.assertAlmostEqual(state[3], 0, msg="check1 y")
Пример #9
0
    def get_best_action(self, board: PodBoard, pod: PodState,
                        reward_func: RewardFunc) -> int:
        """
        Get the action that will result in the highest reward for the given state
        """
        best_action = 0
        best_reward = -999

        for action in range(self.num_actions):
            play = self.action_to_output(action, pod.angle, pod.pos)
            next_pod = board.step(pod, play)
            reward = reward_func(board, next_pod)
            if reward > best_reward:
                best_reward = reward
                best_action = action

        return best_action
Пример #10
0
 def test_pod_to_state_works(self):
     board = PodBoard([Vec2(5000, 5000)])
     pod = PodState()
     state = pod_to_state(pod, board)
     self.assertLess(state, TOTAL_STATES)
Пример #11
0
 def r_func(board: PodBoard, pod) -> float:
     for x in range(depth):
         pod = board.step(pod, controller.play(pod))
     return orig_rfunc(board, pod)