def tester() -> 'PodBoard': """ Generate a board laid out to test as many situations as possible (start) -> 0 -> 1: straight line 1 -> 2: 180° turn 2 -> 3: 90° turn 3 -> 4 -> 5 -> 6: curve around to the right 6 -> 7 (start): curve to the left """ checks = [] start = Vec2(Constants.world_x() / 10, Constants.world_y() / 2) checks.append(start + Vec2(5000, 0)) # straight ahead checks.append(checks[-1] + Vec2(6000, 0)) # straight ahead checks.append(checks[-1] + Vec2(-3000, 0)) # straight back checks.append(checks[-1] + Vec2(0, 2500)) # turn 90° checks.append(checks[-1] + Vec2(-3000, 1500)) # curve around checks.append(checks[-1] + Vec2(-3000, -1500)) # curve around checks.append(checks[-1] + Vec2(0, -5500)) # curve around checks.append(start) # turn other way return PodBoard(checks)
def _prepare_size(): plt.rcParams['figure.figsize'] = [ Constants.world_x() / 1000, Constants.world_y() / 1000 ] plt.rcParams['figure.dpi'] = 100 matplotlib.rcParams['animation.embed_limit'] = 2**27
def grid(rows: int = 3, cols: int = 3, x_spacing: int = 4000, y_spacing: int = 3000) -> 'PodBoard': """ Generate a board with checks in grid form: 1 2 3 4 5 6 7 8 9 """ checks = [] x_center = Constants.world_x() / 2 y_center = Constants.world_y() / 2 # 5 rows: -2, -1, 0, 1, 2 # 4 rows: -1.5, -0.5, 0.5, 1.5 # 3 rows: -1, 0, 1 # => start at -(r-1)/2 row_start = (1 - rows) / 2 col_start = (1 - cols) / 2 for row in range(rows): y_off = (row_start + row) * y_spacing for col in range(cols): x_off = (col_start + col) * x_spacing checks.append(Vec2(x_center + x_off, y_center + y_off)) return PodBoard(checks)
def _get_field_artist() -> Rectangle: """ Get an artist to draw the board """ return Rectangle((0, 0), Constants.world_x(), Constants.world_y(), ec="black", fc="white")
def circle(num_points: int = 3, radius: float = 4000) -> 'PodBoard': """ Generate a PodBoard with checkpoints arranged in a circle around the center of the board """ center = Vec2(Constants.world_x() / 2, Constants.world_y() / 2) angle_diff = 2 * math.pi / num_points v = UNIT * radius checks = [center + v.rotate(i * angle_diff) for i in range(num_points)] return PodBoard(checks)
def __init__(self, board: PodBoard): super().__init__() # Allow the agent to go beyond the bounds - due to the nature of # the rounding functions, it's unlikely the agent will ever give # us the actual min or max scaled_max_turn = Constants.max_turn() * 1.1 scaled_max_thrust = Constants.max_thrust() + 2 * THRUST_PADDING angle_spec = array_spec.BoundedArraySpec( (), np.float, minimum=-scaled_max_turn, maximum=scaled_max_turn) thrust_spec = array_spec.BoundedArraySpec( (), np.int32, minimum=0, maximum=scaled_max_thrust) self._action_spec = { 'angle': angle_spec, 'thrust': thrust_spec } angles_spec = array_spec.BoundedArraySpec( (3,), np.float, minimum=-math.pi, maximum=math.pi) dist_spec = array_spec.BoundedArraySpec( (3,), np.float, minimum=0, maximum=Constants.world_x() * 10) self._observation_spec = { 'angles': angles_spec, 'distances': dist_spec } self._time_step_spec = ts.TimeStep( step_type=array_spec.ArraySpec(shape=(), dtype=np.int32, name='step_type'), reward=array_spec.ArraySpec(shape=(), dtype=np.float32, name='reward'), discount=array_spec.ArraySpec(shape=(), dtype=np.float32, name='discount'), observation=self._observation_spec ) self._board = board self._player = Player(AgentController()) self._initial_state = self.get_state() self._episode_ended = False
def trainer(num_checks: int = 3) -> 'PodBoard': """ Generate a board with the given number of checks. They are all in a row, but at varying distances. The goal is to use it with gen_pods to generate test data with varying distances to the next check. """ checks = [ Vec2(Constants.check_radius() * ((i + 1)**2), Constants.world_y() / 2) for i in range(num_checks) ] # Shift the checks to center them width = checks[-1].x - checks[0].x x_start = (Constants.world_x() - width) / 2 - checks[0].x return PodBoard([check + Vec2(x_start, 0) for check in checks])
def __generate_random_checks(self): min_x = Constants.border_padding() min_y = Constants.border_padding() max_x = Constants.world_x() - Constants.border_padding() max_y = Constants.world_y() - Constants.border_padding() min_dist_sq = Constants.check_spacing() * Constants.check_spacing() self.checkpoints = [] num_checks = random.randrange(Constants.min_checks(), Constants.max_checks()) while len(self.checkpoints) < num_checks: check = Vec2(random.randrange(min_x, max_x, 1), random.randrange(min_y, max_y, 1)) too_close = next((True for x in self.checkpoints if (x - check).square_length() < min_dist_sq), False) if not too_close: self.checkpoints.append(check)
def play(self, pod: PodState) -> PlayOutput: return PlayOutput( Vec2(random() * Constants.world_x(), random() * Constants.world_y()), math.ceil(random() * Constants.max_thrust()) )
import math from typing import Tuple, Callable, List from pod.ai.ai_utils import MAX_DIST from pod.board import PodBoard from pod.constants import Constants from pod.util import PodState, clean_angle ################################################# # Reward functions: signature is # func(board, state) -> float ################################################# RewardFunc = Callable[[PodBoard, PodState], float] DIST_BASE = math.sqrt(Constants.world_x() * Constants.world_y()) def pgr(board: PodBoard, pod: PodState) -> float: """ Pretty Good Reward Attempts to estimate the distance without using a SQRT calculation. """ pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos prev_to_next_check = board.checkpoints[pod.nextCheckId] - board.get_check(pod.nextCheckId - 1) pod_dist_estimate = (math.fabs(pod_to_check.x) + math.fabs(pod_to_check.y)) / 2 check_dist_estimate = (math.fabs(prev_to_next_check.x) + math.fabs(prev_to_next_check.y)) / 2 dist_estimate = pod_dist_estimate / check_dist_estimate checks_hit = len(board.checkpoints) * pod.laps + pod.nextCheckId return 2*checks_hit - dist_estimate + 1
def __prepare_for_world(self): self.fig = plt.figure() self.ax = plt.axes(xlim=(-PADDING, Constants.world_x() + PADDING), ylim=(-PADDING, Constants.world_y() + PADDING)) self.ax.invert_yaxis()
def random() -> 'PodState': return PodState(pos=Vec2.random(Constants.world_x(), Constants.world_y()), vel=UNIT.rotate(2 * math.pi * random()) * (random() * Constants.max_vel()), angle=2 * math.pi * random())
def _get_reward(self) -> int: reward = Constants.world_x() * Constants.world_y() reward += self._player.pod.nextCheckId * 10000 reward -= (self._world.checkpoints[self._player.pod.nextCheckId] - self._player.pod.pos).square_length() return np.asarray(reward, dtype=np.float32)
import math from typing import List import numpy as np from pod.constants import Constants from pod.controller import Controller from pod.util import PodState, clean_angle from vec2 import Vec2, UNIT # Distance to use for scaling inputs MAX_DIST = Vec2(Constants.world_x(), Constants.world_y()).length() def gen_pods(checks: List[Vec2], pos_angles: List[float], pos_dists: List[float], angles: List[float], vel_angles: List[float], vel_mags: List[float]): """ Generate pods in various states :param checks: Checkpoints around which to generate :param pos_angles: Angles from check to pod :param pos_dists: Distances from check to pod :param angles: Orientations of pods. This will be rotated so that 0 points toward the check! :param vel_angles: Angles of velocity. Also rotated so that 0 points toward the check. :param vel_mags: Magnitudes of velocity :return: One pod for each combination of parameters """ relative_poss = [ UNIT.rotate(ang) * dist for ang in pos_angles for dist in pos_dists ] relative_vels = [ UNIT.rotate(ang) * mag for ang in vel_angles for mag in vel_mags