def _to_observation(self): # All values here are in the game frame of reference. We do the rotation at the end. vel_length = self._player.pod.vel.length() vel_angle = math.acos(self._player.pod.vel.x / vel_length) if vel_length > EPSILON else 0.0 check1 = self._board.checkpoints[self._player.pod.nextCheckId] pod_to_check1 = check1 - self._player.pod.pos dist_to_check1 = pod_to_check1.length() ang_to_check1 = math.acos(pod_to_check1.x / dist_to_check1) check2 = self._world.checkpoints[(self._player.pod.nextCheckId + 1) % len(self._world.checkpoints)] check1_to_check2 = check2 - check1 dist_check1_to_check2 = check1_to_check2.length() ang_check1_to_check2 = math.acos(check1_to_check2.x / dist_check1_to_check2) # Re-orient so pod is at (0,0) angle 0.0 return { 'angles': np.array([ clean_angle(vel_angle - self._player.pod.angle), clean_angle(ang_to_check1 - self._player.pod.angle), clean_angle(ang_check1_to_check2 - ang_to_check1 - self._player.pod.angle) ]), 'distances': np.array([ vel_length, dist_to_check1, dist_check1_to_check2, ]) }
def re_dcat(board: PodBoard, pod: PodState) -> float: pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos # Scaled distance to next check dist_penalty = pod_to_check.length() / DIST_BASE # Bonus for each check hit. By making it 2 per check, we ensure that the reward is always # higher after hitting a check. (If left at 1, the dist_penalty could be slightly greater # than 1, leading to a DECREASE in reward for hitting a check) checks_hit = len(board.checkpoints) * pod.laps + pod.nextCheckId # A tiny bit for the angle. This should really be tiny - its purpose is to serve as a # tie-breaker (to prevent the pod from going into orbit around a check). angle = math.fabs(clean_angle(pod_to_check.angle() - pod.angle)) a_penalty = (angle / math.pi) / 10 if angle > Constants.max_turn() else 0 # And finally: this can be important to prevent agents from doing nothing. # The reduction factor is slightly more than the number of turns it takes # (on average) to get from one check to another turn_penalty = pod.turns / 20 return 3 * (checks_hit + 1) \ - dist_penalty \ - a_penalty \ - turn_penalty
def play(self, pod: PodState) -> PlayOutput: check1 = self.board.checkpoints[pod.nextCheckId] check2 = self.board.get_check(pod.nextCheckId + 1) c1_to_p = (pod.pos - check1) c1_to_p_len = c1_to_p.length() c1_to_c2 = (check2 - check1) c1_to_c2_len = c1_to_c2.length() midpoint = ((c1_to_p / c1_to_c2_len) - (c1_to_c2 / c1_to_c2_len)).normalize() target = check1 if c1_to_p_len > Constants.max_vel() * 6: # Still far away. Aim for a point that will help us turn toward the next check target = target + (midpoint * Constants.check_radius() * 2) # else: We're getting close to the check. Stop fooling around and go to it. # OK, now we've got a target point. Do whatever it takes to get there. pod_to_target = target - pod.pos ang_diff_to_target = math.fabs( clean_angle(math.fabs(pod.angle - pod_to_target.angle()))) if ang_diff_to_target < 2 * Constants.max_turn(): thrust = Constants.max_thrust() elif ang_diff_to_target < 4 * Constants.max_turn(): thrust = (ang_diff_to_target - (4 * Constants.max_turn())) / ( 2 * Constants.max_turn()) * Constants.max_thrust() else: thrust = 0 return PlayOutput(target - (2 * pod.vel), thrust)
def ang_reward(board: PodBoard, pod: PodState) -> float: """ Returns the angle between the pod's direction and the next checkpoint, scaled to be in (0, 1) """ pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos angle = clean_angle(pod_to_check.angle() - pod.angle) return 1 - math.fabs(angle / math.pi)
def re_dca(board: PodBoard, pod: PodState) -> float: checks_hit = len(board.checkpoints) * pod.laps + pod.nextCheckId pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos angle = math.fabs(clean_angle(pod_to_check.angle() - pod.angle)) a_penalty = (angle / math.pi) / 10 if angle > Constants.max_turn() else 0 dist_penalty = pod_to_check.length() / DIST_BASE return 3 * (checks_hit + 1) - dist_penalty - a_penalty
def play(self, pod: PodState) -> PlayOutput: out = PlayOutput() check = self.board.checkpoints[pod.nextCheckId] out.target = check - pod.vel ang_to_check = (check - pod.pos).angle() if math.fabs(clean_angle(pod.angle - ang_to_check)) < math.pi / 2: out.thrust = 100 else: out.thrust = 0 return out
def gen_pods(checks: List[Vec2], pos_angles: List[float], pos_dists: List[float], angles: List[float], vel_angles: List[float], vel_mags: List[float]): """ Generate pods in various states :param checks: Checkpoints around which to generate :param pos_angles: Angles from check to pod :param pos_dists: Distances from check to pod :param angles: Orientations of pods. This will be rotated so that 0 points toward the check! :param vel_angles: Angles of velocity. Also rotated so that 0 points toward the check. :param vel_mags: Magnitudes of velocity :return: One pod for each combination of parameters """ relative_poss = [ UNIT.rotate(ang) * dist for ang in pos_angles for dist in pos_dists ] relative_vels = [ UNIT.rotate(ang) * mag for ang in vel_angles for mag in vel_mags ] print("Generating pods: checks={} positions={} angles={} vels={}".format( len(checks), len(relative_poss), len(angles), len(relative_vels))) pods = [] for (c_idx, checkpoint) in enumerate(checks): for rel_pos in relative_poss: ang_to_check = rel_pos.angle() + math.pi pos = checkpoint + rel_pos for rel_vel in relative_vels: vel = rel_vel.rotate(ang_to_check) for angle in angles: pods.append( PodState(pos=pos, vel=vel, angle=clean_angle(angle + ang_to_check), next_check_id=c_idx)) np.random.shuffle(pods) print("{} pods generated".format(len(pods))) return pods
def test_clean_angle_rotates_if_too_big(self): self.assertEqual(0.5 * math.pi, clean_angle(4.5 * math.pi))
def test_clean_angle_rotates_if_too_small(self): self.assertEqual(-0.5 * math.pi, clean_angle(-4.5 * math.pi))
def test_clean_angle_doesnt_change_if_ok(self): self.assertEqual(0, clean_angle(0))