def grid(rows: int = 3, cols: int = 3, x_spacing: int = 4000, y_spacing: int = 3000) -> 'PodBoard': """ Generate a board with checks in grid form: 1 2 3 4 5 6 7 8 9 """ checks = [] x_center = Constants.world_x() / 2 y_center = Constants.world_y() / 2 # 5 rows: -2, -1, 0, 1, 2 # 4 rows: -1.5, -0.5, 0.5, 1.5 # 3 rows: -1, 0, 1 # => start at -(r-1)/2 row_start = (1 - rows) / 2 col_start = (1 - cols) / 2 for row in range(rows): y_off = (row_start + row) * y_spacing for col in range(cols): x_off = (col_start + col) * x_spacing checks.append(Vec2(x_center + x_off, y_center + y_off)) return PodBoard(checks)
def _prepare_size(): plt.rcParams['figure.figsize'] = [ Constants.world_x() / 1000, Constants.world_y() / 1000 ] plt.rcParams['figure.dpi'] = 100 matplotlib.rcParams['animation.embed_limit'] = 2**27
def tester() -> 'PodBoard': """ Generate a board laid out to test as many situations as possible (start) -> 0 -> 1: straight line 1 -> 2: 180° turn 2 -> 3: 90° turn 3 -> 4 -> 5 -> 6: curve around to the right 6 -> 7 (start): curve to the left """ checks = [] start = Vec2(Constants.world_x() / 10, Constants.world_y() / 2) checks.append(start + Vec2(5000, 0)) # straight ahead checks.append(checks[-1] + Vec2(6000, 0)) # straight ahead checks.append(checks[-1] + Vec2(-3000, 0)) # straight back checks.append(checks[-1] + Vec2(0, 2500)) # turn 90° checks.append(checks[-1] + Vec2(-3000, 1500)) # curve around checks.append(checks[-1] + Vec2(-3000, -1500)) # curve around checks.append(checks[-1] + Vec2(0, -5500)) # curve around checks.append(start) # turn other way return PodBoard(checks)
def _get_field_artist() -> Rectangle: """ Get an artist to draw the board """ return Rectangle((0, 0), Constants.world_x(), Constants.world_y(), ec="black", fc="white")
def __init__(self, num_thrust: int = 2, num_angle: int = 3): self.num_thrust = num_thrust self.num_angle = num_angle self.num_actions = num_thrust * num_angle thrusts = _arange(0, Constants.max_thrust(), num_thrust) angs = _arange(-Constants.max_turn(), Constants.max_turn(), num_angle) self._action_table = [(int(thr), ang) for thr in thrusts for ang in angs]
def play_to_action(self, thrust: int, angle: float) -> int: """ Given a legal play (angle/thrust), find the nearest discrete action """ thrust_pct = thrust / Constants.max_thrust() angle_pct = (angle + Constants.max_turn()) / (2 * Constants.max_turn()) thrust_idx = math.floor(thrust_pct * (self.num_thrust - 1)) angle_idx = math.floor(angle_pct * (self.num_angle - 1)) return math.floor(thrust_idx * self.num_angle + angle_idx)
def step(self, pod: PodState, play: PlayOutput, output: PodState = None) -> PodState: """ For the given pod, implement the given play. On each turn the pods movements are computed this way: Rotation: the pod rotates to face the target point, with a maximum of 18 degrees (except for the 1rst round). Acceleration: the pod's facing vector is multiplied by the given thrust value. The result is added to the current speed vector. Movement: The speed vector is added to the position of the pod. If a collision would occur at this point, the pods rebound off each other. Friction: the current speed vector of each pod is multiplied by 0.85 The speed's values are truncated and the position's values are rounded to the nearest integer. Collisions are elastic. The minimum impulse of a collision is 120. A boost is in fact an acceleration of 650. A shield multiplies the Pod mass by 10. The provided angle is absolute. 0° means facing EAST while 90° means facing SOUTH. :param pod: Initial state :param play: Action to play :param output: Output state to update (may be the same as input pod). If not given, a new one will be created. :return: The new pod state (same object as output if given) """ if output is None: output = PodState() # 1. Rotation requested_angle = (play.target - pod.pos).angle() angle = legal_angle(requested_angle, pod.angle) output.angle = angle # 2. Acceleration dir = UNIT.rotate(angle) thrust = int(within(play.thrust, 0, Constants.max_thrust())) output.vel = pod.vel + (dir * thrust) # 3. Movement output.pos = pod.pos + output.vel # 4. Friction output.vel = output.vel * Constants.friction() # 5. Rounding output.pos = output.pos.round() output.vel = output.vel.truncate() # Update progress output.turns = pod.turns + 1 output.nextCheckId = pod.nextCheckId output.laps = pod.laps check = self.checkpoints[pod.nextCheckId] if (check - output.pos).square_length() < Constants.check_radius_sq(): output.nextCheckId += 1 if output.nextCheckId >= len(self.checkpoints): output.nextCheckId = 0 output.laps += 1 return output
def circle(num_points: int = 3, radius: float = 4000) -> 'PodBoard': """ Generate a PodBoard with checkpoints arranged in a circle around the center of the board """ center = Vec2(Constants.world_x() / 2, Constants.world_y() / 2) angle_diff = 2 * math.pi / num_points v = UNIT * radius checks = [center + v.rotate(i * angle_diff) for i in range(num_points)] return PodBoard(checks)
def legal_angle(req_angle: float, pod_angle: float) -> float: """ Get the actual angle to apply, given the player's input :param req_angle: Angle that the player requested :param pod_angle: Angle in which the pod is facing :return: Angle to use for calculations (within [-pi, pi]) """ d_angle = within(clean_angle(req_angle - pod_angle), -Constants.max_turn(), Constants.max_turn()) return clean_angle(pod_angle + d_angle)
def _to_state(board: PodBoard, pod: PodState) -> Tuple[int, int, int, int]: vel = pod.vel.rotate(-pod.angle) check1 = (board.get_check(pod.nextCheckId) - pod.pos).rotate(-pod.angle) return ( _discretize(vel.x / Constants.max_vel(), 10), _discretize(vel.y / Constants.max_vel(), 10), _discretize(check1.x / MAX_DIST, 30), _discretize(check1.y / MAX_DIST, 30), )
def test_action_to_output_turn_right(self): action = self.ad.play_to_action(50, Constants.max_turn()) pod_pos = Vec2(100, 100) po = self.ad.action_to_output(action, 1.23, pod_pos) # The thrust should not have changed self.assertEqual(po.thrust, 50) # The pod is at (100, 100), angle 1.23, requested turn max_turn... # If we undo the move and rotate, we should have a vector down the X-axis (i.e. angle 0) rel_target = (po.target - pod_pos).rotate(-1.23 - Constants.max_turn()) self.assertAlmostEqual(rel_target.y, 0) self.assertGreater(rel_target.x, 1)
def __init__(self, board: PodBoard): super().__init__() # Allow the agent to go beyond the bounds - due to the nature of # the rounding functions, it's unlikely the agent will ever give # us the actual min or max scaled_max_turn = Constants.max_turn() * 1.1 scaled_max_thrust = Constants.max_thrust() + 2 * THRUST_PADDING angle_spec = array_spec.BoundedArraySpec( (), np.float, minimum=-scaled_max_turn, maximum=scaled_max_turn) thrust_spec = array_spec.BoundedArraySpec( (), np.int32, minimum=0, maximum=scaled_max_thrust) self._action_spec = { 'angle': angle_spec, 'thrust': thrust_spec } angles_spec = array_spec.BoundedArraySpec( (3,), np.float, minimum=-math.pi, maximum=math.pi) dist_spec = array_spec.BoundedArraySpec( (3,), np.float, minimum=0, maximum=Constants.world_x() * 10) self._observation_spec = { 'angles': angles_spec, 'distances': dist_spec } self._time_step_spec = ts.TimeStep( step_type=array_spec.ArraySpec(shape=(), dtype=np.int32, name='step_type'), reward=array_spec.ArraySpec(shape=(), dtype=np.float32, name='reward'), discount=array_spec.ArraySpec(shape=(), dtype=np.float32, name='discount'), observation=self._observation_spec ) self._board = board self._player = Player(AgentController()) self._initial_state = self.get_state() self._episode_ended = False
def play(self, pod: PodState) -> PlayOutput: check1 = self.board.checkpoints[pod.nextCheckId] check2 = self.board.get_check(pod.nextCheckId + 1) c1_to_p = (pod.pos - check1) c1_to_p_len = c1_to_p.length() c1_to_c2 = (check2 - check1) c1_to_c2_len = c1_to_c2.length() midpoint = ((c1_to_p / c1_to_c2_len) - (c1_to_c2 / c1_to_c2_len)).normalize() target = check1 if c1_to_p_len > Constants.max_vel() * 6: # Still far away. Aim for a point that will help us turn toward the next check target = target + (midpoint * Constants.check_radius() * 2) # else: We're getting close to the check. Stop fooling around and go to it. # OK, now we've got a target point. Do whatever it takes to get there. pod_to_target = target - pod.pos ang_diff_to_target = math.fabs( clean_angle(math.fabs(pod.angle - pod_to_target.angle()))) if ang_diff_to_target < 2 * Constants.max_turn(): thrust = Constants.max_thrust() elif ang_diff_to_target < 4 * Constants.max_turn(): thrust = (ang_diff_to_target - (4 * Constants.max_turn())) / ( 2 * Constants.max_turn()) * Constants.max_thrust() else: thrust = 0 return PlayOutput(target - (2 * pod.vel), thrust)
def test_get_best_action_works_right(self): board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)]) # Pod is directly below the check, but the check is behind and to its right pod = PodState(Vec2(5000, 0)) pod.angle = math.pi * 1.25 self.__do_get_best_action_assert(board, pod, 0, -Constants.max_turn())
def trainer(num_checks: int = 3) -> 'PodBoard': """ Generate a board with the given number of checks. They are all in a row, but at varying distances. The goal is to use it with gen_pods to generate test data with varying distances to the next check. """ checks = [ Vec2(Constants.check_radius() * ((i + 1)**2), Constants.world_y() / 2) for i in range(num_checks) ] # Shift the checks to center them width = checks[-1].x - checks[0].x x_start = (Constants.world_x() - width) / 2 - checks[0].x return PodBoard([check + Vec2(x_start, 0) for check in checks])
def test_get_best_action_works_behind_right(self): board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)]) # Pod is directly right of check, but facing away (slightly to the right) pod = PodState(Vec2(7000, 5000)) pod.angle = -0.000001 self.__do_get_best_action_assert(board, pod, 0, -Constants.max_turn())
def train(self, num_episodes: int = 10, prob_rand_action: float = 0.5, max_turns: int = 50, learning_rate: float = 1.0, future_discount: float = 0.8) -> List[float]: """ Train starting at a random point """ max_reward_per_ep = [] for episode in range(num_episodes): # The pod starts in a random position at a fixed (far) distance from the check, # pointing in a random direction pos_offset = UNIT.rotate( random.random() * 2 * math.pi) * Constants.check_radius() * ( 16 * random.random() + 1) pod = PodState(pos=self.board.checkpoints[0] + pos_offset, angle=2 * math.pi * random.random() - math.pi) max_reward_per_ep.append( self.__do_train(pod, max_turns, prob_rand_action, learning_rate, future_discount)) return max_reward_per_ep
def test_get_best_action_works_straight(self): board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)]) # Pod is directly below the check, but looking straight at it pod = PodState(Vec2(5000, 0)) pod.angle = math.pi / 2 self.__do_get_best_action_assert(board, pod, Constants.max_thrust(), 0)
def re_dcat(board: PodBoard, pod: PodState) -> float: pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos # Scaled distance to next check dist_penalty = pod_to_check.length() / DIST_BASE # Bonus for each check hit. By making it 2 per check, we ensure that the reward is always # higher after hitting a check. (If left at 1, the dist_penalty could be slightly greater # than 1, leading to a DECREASE in reward for hitting a check) checks_hit = len(board.checkpoints) * pod.laps + pod.nextCheckId # A tiny bit for the angle. This should really be tiny - its purpose is to serve as a # tie-breaker (to prevent the pod from going into orbit around a check). angle = math.fabs(clean_angle(pod_to_check.angle() - pod.angle)) a_penalty = (angle / math.pi) / 10 if angle > Constants.max_turn() else 0 # And finally: this can be important to prevent agents from doing nothing. # The reduction factor is slightly more than the number of turns it takes # (on average) to get from one check to another turn_penalty = pod.turns / 20 return 3 * (checks_hit + 1) \ - dist_penalty \ - a_penalty \ - turn_penalty
def to_vector(self, board: PodBoard, pod: PodState) -> List[float]: # Velocity is already relative to the pod, so it just needs to be rotated vel = pod.vel.rotate(-pod.angle) / Constants.max_vel() check1 = (board.get_check(pod.nextCheckId) - pod.pos).rotate(-pod.angle) / MAX_DIST return [vel.x, vel.y, check1.x, check1.y]
def test_state_to_vector_works2(self): # A pod at (-100, -100) pointing up +Y, moving 45 degrees down-left pod = PodState(Vec2(-100, -100), Vec2(-3, -3), math.pi / 2) # The target checkpoint is directly in front board = PodBoard([Vec2(-100, 1000), ORIGIN]) state = state_to_vector(pod, board) self.assertEqual(len(state), STATE_VECTOR_LEN) self.assertAlmostEqual(state[0], -3 / Constants.max_vel(), msg="velocity x") self.assertAlmostEqual(state[1], 3 / Constants.max_vel(), msg="velocity y") self.assertAlmostEqual(state[2], 1100 / MAX_DIST, msg="check1 x") self.assertAlmostEqual(state[3], 0, msg="check1 y")
def __generate_random_checks(self): min_x = Constants.border_padding() min_y = Constants.border_padding() max_x = Constants.world_x() - Constants.border_padding() max_y = Constants.world_y() - Constants.border_padding() min_dist_sq = Constants.check_spacing() * Constants.check_spacing() self.checkpoints = [] num_checks = random.randrange(Constants.min_checks(), Constants.max_checks()) while len(self.checkpoints) < num_checks: check = Vec2(random.randrange(min_x, max_x, 1), random.randrange(min_y, max_y, 1)) too_close = next((True for x in self.checkpoints if (x - check).square_length() < min_dist_sq), False) if not too_close: self.checkpoints.append(check)
def speed_reward(board: PodBoard, next_pod: PodState) -> float: """ Indicates how much the speed is taking us toward the next check (scaled). """ pod_to_check = board.checkpoints[next_pod.nextCheckId] - next_pod.pos dist_to_check = pod_to_check.length() # a*b = |a|*|b|*cos # Thus, vel*check / dist = how much the vel is taking us toward the check return (next_pod.vel * pod_to_check) / (dist_to_check * Constants.max_vel())
def _pod_wedge_info(pod: PodState) -> Tuple[float, float, Vec2]: """ Get info for drawing a wedge for ta pod: angle from, angle to, center """ angle_deg = math.degrees(pod.angle) + 180.0 offset = Vec2(Constants.pod_radius() / 2, 0).rotate(math.radians(angle_deg)) center = pod.pos - offset return angle_deg - 20, angle_deg + 20, center
def _get_pod_artist(pod: PodState, color: Tuple[float, float, float]) -> Wedge: # Draw the wedge theta1, theta2, center = _pod_wedge_info(pod) wedge = Wedge((center.x, center.y), Constants.pod_radius(), theta1, theta2, color=color) wedge.set_zorder(10) return wedge
def gen_initial_state(self) -> PodState: """ Generate a state at which to start a training episode """ # The pod starts in a random position at a random distance from the check, # pointing in a random direction pos_offset = UNIT.rotate(random() * 2 * math.pi) * \ Constants.check_radius() * (15 * random() + 1) return PodState(pos=self.target.board.get_check(0) + pos_offset, angle=2 * math.pi * random() - math.pi)
def re_dca(board: PodBoard, pod: PodState) -> float: checks_hit = len(board.checkpoints) * pod.laps + pod.nextCheckId pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos angle = math.fabs(clean_angle(pod_to_check.angle() - pod.angle)) a_penalty = (angle / math.pi) / 10 if angle > Constants.max_turn() else 0 dist_penalty = pod_to_check.length() / DIST_BASE return 3 * (checks_hit + 1) - dist_penalty - a_penalty
def test_state_to_vector_works1(self): # A pod at (100, 100) pointing down -X, moving full speed +Y pod = PodState(Vec2(100, 100), Vec2(0, Constants.max_vel()), -math.pi) # The target checkpoint is directly behind it board = PodBoard([Vec2(100 + MAX_DIST, 100), ORIGIN]) state = state_to_vector(pod, board) self.assertEqual(len(state), STATE_VECTOR_LEN) self.assertAlmostEqual(state[0], 0, msg="velocity x") self.assertAlmostEqual(state[1], -1, msg="velocity y") self.assertAlmostEqual(state[2], -1, msg="check1 x") self.assertAlmostEqual(state[3], 0, msg="check1 y")
def test_actions_produce_all_possible_combinations(self): # First, collect all unique values outputs = set() angles = set() thrusts = set() for action in range(0, self.ad.num_actions): thrust, angle = self.ad.action_to_play(action) outputs.add((thrust, angle)) angles.add(angle) thrusts.add(thrust) # Ensure that we have the correct number of each self.assertEqual(len(outputs), self.ad.num_actions) self.assertEqual(len(angles), self.ad.num_angle) self.assertEqual(len(thrusts), self.ad.num_thrust) # Ensure that each possibility is present thrust_inc = Constants.max_thrust() / (self.ad.num_thrust - 1) for t in range(0, self.ad.num_thrust): self.assertIn(t * thrust_inc, thrusts) ang_inc = (Constants.max_turn() * 2) / (self.ad.num_angle - 1) for a in range(0, self.ad.num_angle): self.assertIn(a * ang_inc - Constants.max_turn(), angles)
def train_progressively(self, dist_increment: int, ep_per_dist: int, num_incr: int, prob_rand_action: float = 0.5, learning_rate: float = 0.5, future_discount: float = 0.8) -> List[float]: """ Train by randomly generating pods close to the checkpoint, and gradually backing away :param dist_increment: Increment by which to increase the distance to the check :param ep_per_dist: Number of episodes to run at each increment :param num_incr: Number of distance increments to run :param prob_rand_action: :param learning_rate: :param future_discount: :return: List of rewards for each episode """ old_rew = self.reward_func self.reward_func = check_reward max_reward_per_ep = [] for incr in range(1, num_incr + 1): for ep_inc in range(ep_per_dist): # Position is (radius + increment) distance from check pos_offset = UNIT.rotate(random.random() * 2 * math.pi) * \ (Constants.check_radius() + dist_increment * incr) pod = PodState(pos=self.board.checkpoints[0] + pos_offset, angle=2 * math.pi * random.random() - math.pi) max_reward_per_ep.append( self.__do_train(pod, 5 * incr, prob_rand_action, learning_rate, future_discount)) self.reward_func = old_rew return max_reward_per_ep