def to_vector(self, board: PodBoard, pod: PodState) -> List[float]: # Velocity is already relative to the pod, so it just needs to be rotated vel = pod.vel.rotate(-pod.angle) / Constants.max_vel() check1 = (board.get_check(pod.nextCheckId) - pod.pos).rotate(-pod.angle) / MAX_DIST check2 = (board.get_check(pod.nextCheckId + 1) - pod.pos).rotate(-pod.angle) / MAX_DIST return [vel.x, vel.y, check1.x, check1.y, check2.x, check2.y]
def test_get_best_action_works_behind_right(self): board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)]) # Pod is directly right of check, but facing away (slightly to the right) pod = PodState(Vec2(7000, 5000)) pod.angle = -0.000001 self.__do_get_best_action_assert(board, pod, 0, -Constants.max_turn())
def test_get_best_action_works_right(self): board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)]) # Pod is directly below the check, but the check is behind and to its right pod = PodState(Vec2(5000, 0)) pod.angle = math.pi * 1.25 self.__do_get_best_action_assert(board, pod, 0, -Constants.max_turn())
def test_get_best_action_works_straight(self): board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)]) # Pod is directly below the check, but looking straight at it pod = PodState(Vec2(5000, 0)) pod.angle = math.pi / 2 self.__do_get_best_action_assert(board, pod, Constants.max_thrust(), 0)
def _to_state(board: PodBoard, pod: PodState) -> Tuple[int, int, int, int]: vel = pod.vel.rotate(-pod.angle) check1 = (board.get_check(pod.nextCheckId) - pod.pos).rotate(-pod.angle) return ( _discretize(vel.x / Constants.max_vel(), 10), _discretize(vel.y / Constants.max_vel(), 10), _discretize(check1.x / MAX_DIST, 30), _discretize(check1.y / MAX_DIST, 30), )
def test_state_to_vector_works1(self): # A pod at (100, 100) pointing down -X, moving full speed +Y pod = PodState(Vec2(100, 100), Vec2(0, Constants.max_vel()), -math.pi) # The target checkpoint is directly behind it board = PodBoard([Vec2(100 + MAX_DIST, 100), ORIGIN]) state = state_to_vector(pod, board) self.assertEqual(len(state), STATE_VECTOR_LEN) self.assertAlmostEqual(state[0], 0, msg="velocity x") self.assertAlmostEqual(state[1], -1, msg="velocity y") self.assertAlmostEqual(state[2], -1, msg="check1 x") self.assertAlmostEqual(state[3], 0, msg="check1 y")
def pgr(board: PodBoard, pod: PodState) -> float: """ Pretty Good Reward Attempts to estimate the distance without using a SQRT calculation. """ pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos prev_to_next_check = board.checkpoints[pod.nextCheckId] - board.get_check(pod.nextCheckId - 1) pod_dist_estimate = (math.fabs(pod_to_check.x) + math.fabs(pod_to_check.y)) / 2 check_dist_estimate = (math.fabs(prev_to_next_check.x) + math.fabs(prev_to_next_check.y)) / 2 dist_estimate = pod_dist_estimate / check_dist_estimate checks_hit = len(board.checkpoints) * pod.laps + pod.nextCheckId return 2*checks_hit - dist_estimate + 1
def test_state_to_vector_works2(self): # A pod at (-100, -100) pointing up +Y, moving 45 degrees down-left pod = PodState(Vec2(-100, -100), Vec2(-3, -3), math.pi / 2) # The target checkpoint is directly in front board = PodBoard([Vec2(-100, 1000), ORIGIN]) state = state_to_vector(pod, board) self.assertEqual(len(state), STATE_VECTOR_LEN) self.assertAlmostEqual(state[0], -3 / Constants.max_vel(), msg="velocity x") self.assertAlmostEqual(state[1], 3 / Constants.max_vel(), msg="velocity y") self.assertAlmostEqual(state[2], 1100 / MAX_DIST, msg="check1 x") self.assertAlmostEqual(state[3], 0, msg="check1 y")
def get_best_action(self, board: PodBoard, pod: PodState, reward_func: RewardFunc) -> int: """ Get the action that will result in the highest reward for the given state """ best_action = 0 best_reward = -999 for action in range(self.num_actions): play = self.action_to_output(action, pod.angle, pod.pos) next_pod = board.step(pod, play) reward = reward_func(board, next_pod) if reward > best_reward: best_reward = reward best_action = action return best_action
def test_pod_to_state_works(self): board = PodBoard([Vec2(5000, 5000)]) pod = PodState() state = pod_to_state(pod, board) self.assertLess(state, TOTAL_STATES)
def r_func(board: PodBoard, pod) -> float: for x in range(depth): pod = board.step(pod, controller.play(pod)) return orig_rfunc(board, pod)