def test_get_best_action_works_behind_right(self): board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)]) # Pod is directly right of check, but facing away (slightly to the right) pod = PodState(Vec2(7000, 5000)) pod.angle = -0.000001 self.__do_get_best_action_assert(board, pod, 0, -Constants.max_turn())
def test_get_best_action_works_right(self): board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)]) # Pod is directly below the check, but the check is behind and to its right pod = PodState(Vec2(5000, 0)) pod.angle = math.pi * 1.25 self.__do_get_best_action_assert(board, pod, 0, -Constants.max_turn())
def test_get_best_action_works_straight(self): board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)]) # Pod is directly below the check, but looking straight at it pod = PodState(Vec2(5000, 0)) pod.angle = math.pi / 2 self.__do_get_best_action_assert(board, pod, Constants.max_thrust(), 0)
def test_PodState_equals_not_initial(self): p1 = PodState(pos=Vec2(1, 2), angle=1.23, vel=Vec2(3, 4), next_check_id=5) p2 = PodState(pos=Vec2(1, 2), angle=1.23, vel=Vec2(3, 4), next_check_id=5) self.assertEqual(p1, p2)
def __init__(self, controller: Controller, pod: PodState = None): """ :param controller: Handles movement :param pod: Initial state of the pod. If omitted, a new one will be created """ self.controller = controller if pod is None: self.pod = PodState(controller.board.checkpoints[-1]) else: self.pod = pod self.initial_state = self.pod.clone()
def train(self, num_episodes: int = 10, prob_rand_action: float = 0.5, max_turns: int = 50, learning_rate: float = 1.0, future_discount: float = 0.8) -> List[float]: """ Train starting at a random point """ max_reward_per_ep = [] for episode in range(num_episodes): # The pod starts in a random position at a fixed (far) distance from the check, # pointing in a random direction pos_offset = UNIT.rotate( random.random() * 2 * math.pi) * Constants.check_radius() * ( 16 * random.random() + 1) pod = PodState(pos=self.board.checkpoints[0] + pos_offset, angle=2 * math.pi * random.random() - math.pi) max_reward_per_ep.append( self.__do_train(pod, max_turns, prob_rand_action, learning_rate, future_discount)) return max_reward_per_ep
def gen_initial_state(self) -> PodState: """ Generate a state at which to start a training episode """ # The pod starts in a random position at a random distance from the check, # pointing in a random direction pos_offset = UNIT.rotate(random() * 2 * math.pi) * \ Constants.check_radius() * (15 * random() + 1) return PodState(pos=self.target.board.get_check(0) + pos_offset, angle=2 * math.pi * random() - math.pi)
def test_gen_pods(self): check = Vec2(10, 20) pods = gen_pods( [check], # One to the left, one to the right of the check [0, math.pi], [1.0], # Always pointing at the check [0.0], # One to the left, one to the right of the pod's heading (so +/- y) [math.pi / 2, 3 * math.pi / 2], [2.0]) for pod in [ PodState(pos=check + Vec2(1, 0), vel=Vec2(0, 2), angle=math.pi, next_check_id=0), PodState(pos=check + Vec2(1, 0), vel=Vec2(0, -2), angle=math.pi, next_check_id=0), PodState(pos=check - Vec2(1, 0), vel=Vec2(0, 2), angle=0, next_check_id=0), PodState(pos=check - Vec2(1, 0), vel=Vec2(0, -2), angle=0, next_check_id=0) ]: self.assertIn(pod, pods, "{} not found in {}".format(pod, [str(p) for p in pods])) self.assertEqual(len(pods), 4)
def test_state_to_vector_works1(self): # A pod at (100, 100) pointing down -X, moving full speed +Y pod = PodState(Vec2(100, 100), Vec2(0, Constants.max_vel()), -math.pi) # The target checkpoint is directly behind it board = PodBoard([Vec2(100 + MAX_DIST, 100), ORIGIN]) state = state_to_vector(pod, board) self.assertEqual(len(state), STATE_VECTOR_LEN) self.assertAlmostEqual(state[0], 0, msg="velocity x") self.assertAlmostEqual(state[1], -1, msg="velocity y") self.assertAlmostEqual(state[2], -1, msg="check1 x") self.assertAlmostEqual(state[3], 0, msg="check1 y")
class Player: """ A Player encapsulates both a Controller and its associated PodState """ def __init__(self, controller: Controller, pod: PodState = None): """ :param controller: Handles movement :param pod: Initial state of the pod. If omitted, a new one will be created """ self.controller = controller if pod is None: self.pod = PodState(controller.board.checkpoints[-1]) else: self.pod = pod self.initial_state = self.pod.clone() def __str__(self): return "Player[controller=%s pod=%s]" % (type( self.controller), self.pod) def step(self): """ Have the Controller play once, and update the pod with the output """ self.controller.step(self.pod) def reset(self): """ Reset: put the pod at the start position with 0 turns/laps """ self.pod = self.initial_state.clone() self.controller.reset() def record(self) -> Dict: log = {'pod': self.pod.clone()} self.controller.record(log) return log
def test_state_to_vector_works2(self): # A pod at (-100, -100) pointing up +Y, moving 45 degrees down-left pod = PodState(Vec2(-100, -100), Vec2(-3, -3), math.pi / 2) # The target checkpoint is directly in front board = PodBoard([Vec2(-100, 1000), ORIGIN]) state = state_to_vector(pod, board) self.assertEqual(len(state), STATE_VECTOR_LEN) self.assertAlmostEqual(state[0], -3 / Constants.max_vel(), msg="velocity x") self.assertAlmostEqual(state[1], 3 / Constants.max_vel(), msg="velocity y") self.assertAlmostEqual(state[2], 1100 / MAX_DIST, msg="check1 x") self.assertAlmostEqual(state[3], 0, msg="check1 y")
def gen_pods(checks: List[Vec2], pos_angles: List[float], pos_dists: List[float], angles: List[float], vel_angles: List[float], vel_mags: List[float]): """ Generate pods in various states :param checks: Checkpoints around which to generate :param pos_angles: Angles from check to pod :param pos_dists: Distances from check to pod :param angles: Orientations of pods. This will be rotated so that 0 points toward the check! :param vel_angles: Angles of velocity. Also rotated so that 0 points toward the check. :param vel_mags: Magnitudes of velocity :return: One pod for each combination of parameters """ relative_poss = [ UNIT.rotate(ang) * dist for ang in pos_angles for dist in pos_dists ] relative_vels = [ UNIT.rotate(ang) * mag for ang in vel_angles for mag in vel_mags ] print("Generating pods: checks={} positions={} angles={} vels={}".format( len(checks), len(relative_poss), len(angles), len(relative_vels))) pods = [] for (c_idx, checkpoint) in enumerate(checks): for rel_pos in relative_poss: ang_to_check = rel_pos.angle() + math.pi pos = checkpoint + rel_pos for rel_vel in relative_vels: vel = rel_vel.rotate(ang_to_check) for angle in angles: pods.append( PodState(pos=pos, vel=vel, angle=clean_angle(angle + ang_to_check), next_check_id=c_idx)) np.random.shuffle(pods) print("{} pods generated".format(len(pods))) return pods
def train_progressively(self, dist_increment: int, ep_per_dist: int, num_incr: int, prob_rand_action: float = 0.5, learning_rate: float = 0.5, future_discount: float = 0.8) -> List[float]: """ Train by randomly generating pods close to the checkpoint, and gradually backing away :param dist_increment: Increment by which to increase the distance to the check :param ep_per_dist: Number of episodes to run at each increment :param num_incr: Number of distance increments to run :param prob_rand_action: :param learning_rate: :param future_discount: :return: List of rewards for each episode """ old_rew = self.reward_func self.reward_func = check_reward max_reward_per_ep = [] for incr in range(1, num_incr + 1): for ep_inc in range(ep_per_dist): # Position is (radius + increment) distance from check pos_offset = UNIT.rotate(random.random() * 2 * math.pi) * \ (Constants.check_radius() + dist_increment * incr) pod = PodState(pos=self.board.checkpoints[0] + pos_offset, angle=2 * math.pi * random.random() - math.pi) max_reward_per_ep.append( self.__do_train(pod, 5 * incr, prob_rand_action, learning_rate, future_discount)) self.reward_func = old_rew return max_reward_per_ep
def test_PodState_equals_fails_initial(self): p1 = PodState(pos=Vec2(0, 1)) p2 = PodState(pos=Vec2(1, 1)) self.assertNotEqual(p1, p2)
def test_PodState_equals_initial(self): p1 = PodState() p2 = PodState() self.assertEqual(p1, p2)
def test_PodState_deserializes_equal_to_serialized(self): pod = PodState() pod.angle = 1.23 pod.pos = Vec2(34, 56) pod.laps = 69 pod.vel = Vec2(88, 77) pod.nextCheckId = 37 ser = pod.serialize() copy = PodState() copy.deserialize(ser) self.assertEqual(pod.angle, copy.angle) self.assertEqual(pod.pos, copy.pos) self.assertEqual(pod.laps, copy.laps) self.assertEqual(pod.vel, copy.vel) self.assertEqual(pod.nextCheckId, copy.nextCheckId)
def test_pod_to_state_works(self): board = PodBoard([Vec2(5000, 5000)]) pod = PodState() state = pod_to_state(pod, board) self.assertLess(state, TOTAL_STATES)
def step(self, pod: PodState, play: PlayOutput, output: PodState = None) -> PodState: """ For the given pod, implement the given play. On each turn the pods movements are computed this way: Rotation: the pod rotates to face the target point, with a maximum of 18 degrees (except for the 1rst round). Acceleration: the pod's facing vector is multiplied by the given thrust value. The result is added to the current speed vector. Movement: The speed vector is added to the position of the pod. If a collision would occur at this point, the pods rebound off each other. Friction: the current speed vector of each pod is multiplied by 0.85 The speed's values are truncated and the position's values are rounded to the nearest integer. Collisions are elastic. The minimum impulse of a collision is 120. A boost is in fact an acceleration of 650. A shield multiplies the Pod mass by 10. The provided angle is absolute. 0° means facing EAST while 90° means facing SOUTH. :param pod: Initial state :param play: Action to play :param output: Output state to update (may be the same as input pod). If not given, a new one will be created. :return: The new pod state (same object as output if given) """ if output is None: output = PodState() # 1. Rotation requested_angle = (play.target - pod.pos).angle() angle = legal_angle(requested_angle, pod.angle) output.angle = angle # 2. Acceleration dir = UNIT.rotate(angle) thrust = int(within(play.thrust, 0, Constants.max_thrust())) output.vel = pod.vel + (dir * thrust) # 3. Movement output.pos = pod.pos + output.vel # 4. Friction output.vel = output.vel * Constants.friction() # 5. Rounding output.pos = output.pos.round() output.vel = output.vel.truncate() # Update progress output.turns = pod.turns + 1 output.nextCheckId = pod.nextCheckId output.laps = pod.laps check = self.checkpoints[pod.nextCheckId] if (check - output.pos).square_length() < Constants.check_radius_sq(): output.nextCheckId += 1 if output.nextCheckId >= len(self.checkpoints): output.nextCheckId = 0 output.laps += 1 return output