示例#1
0
    def _to_observation(self):
        # All values here are in the game frame of reference. We do the rotation at the end.
        vel_length = self._player.pod.vel.length()
        vel_angle = math.acos(self._player.pod.vel.x / vel_length) if vel_length > EPSILON else 0.0

        check1 = self._board.checkpoints[self._player.pod.nextCheckId]
        pod_to_check1 = check1 - self._player.pod.pos
        dist_to_check1 = pod_to_check1.length()
        ang_to_check1 = math.acos(pod_to_check1.x / dist_to_check1)

        check2 = self._world.checkpoints[(self._player.pod.nextCheckId + 1) % len(self._world.checkpoints)]
        check1_to_check2 = check2 - check1
        dist_check1_to_check2 = check1_to_check2.length()
        ang_check1_to_check2 = math.acos(check1_to_check2.x / dist_check1_to_check2)

        # Re-orient so pod is at (0,0) angle 0.0
        return {
            'angles': np.array([
                clean_angle(vel_angle - self._player.pod.angle),
                clean_angle(ang_to_check1 - self._player.pod.angle),
                clean_angle(ang_check1_to_check2 - ang_to_check1 - self._player.pod.angle)
            ]),
            'distances': np.array([
                vel_length,
                dist_to_check1,
                dist_check1_to_check2,
            ])
        }
示例#2
0
def re_dcat(board: PodBoard, pod: PodState) -> float:
    pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos

    # Scaled distance to next check
    dist_penalty = pod_to_check.length() / DIST_BASE

    # Bonus for each check hit. By making it 2 per check, we ensure that the reward is always
    # higher after hitting a check. (If left at 1, the dist_penalty could be slightly greater
    # than 1, leading to a DECREASE in reward for hitting a check)
    checks_hit = len(board.checkpoints) * pod.laps + pod.nextCheckId

    # A tiny bit for the angle. This should really be tiny - its purpose is to serve as a
    # tie-breaker (to prevent the pod from going into orbit around a check).
    angle = math.fabs(clean_angle(pod_to_check.angle() - pod.angle))
    a_penalty = (angle / math.pi) / 10 if angle > Constants.max_turn() else 0

    # And finally: this can be important to prevent agents from doing nothing.
    # The reduction factor is slightly more than the number of turns it takes
    # (on average) to get from one check to another
    turn_penalty = pod.turns / 20

    return 3 * (checks_hit + 1) \
           - dist_penalty \
           - a_penalty \
           - turn_penalty
示例#3
0
    def play(self, pod: PodState) -> PlayOutput:
        check1 = self.board.checkpoints[pod.nextCheckId]
        check2 = self.board.get_check(pod.nextCheckId + 1)
        c1_to_p = (pod.pos - check1)
        c1_to_p_len = c1_to_p.length()
        c1_to_c2 = (check2 - check1)
        c1_to_c2_len = c1_to_c2.length()

        midpoint = ((c1_to_p / c1_to_c2_len) -
                    (c1_to_c2 / c1_to_c2_len)).normalize()
        target = check1

        if c1_to_p_len > Constants.max_vel() * 6:
            # Still far away. Aim for a point that will help us turn toward the next check
            target = target + (midpoint * Constants.check_radius() * 2)
        # else: We're getting close to the check. Stop fooling around and go to it.

        # OK, now we've got a target point. Do whatever it takes to get there.
        pod_to_target = target - pod.pos
        ang_diff_to_target = math.fabs(
            clean_angle(math.fabs(pod.angle - pod_to_target.angle())))

        if ang_diff_to_target < 2 * Constants.max_turn():
            thrust = Constants.max_thrust()
        elif ang_diff_to_target < 4 * Constants.max_turn():
            thrust = (ang_diff_to_target - (4 * Constants.max_turn())) / (
                2 * Constants.max_turn()) * Constants.max_thrust()
        else:
            thrust = 0

        return PlayOutput(target - (2 * pod.vel), thrust)
示例#4
0
def ang_reward(board: PodBoard, pod: PodState) -> float:
    """
    Returns the angle between the pod's direction and the next checkpoint, scaled to be in (0, 1)
    """
    pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos
    angle = clean_angle(pod_to_check.angle() - pod.angle)

    return 1 - math.fabs(angle / math.pi)
示例#5
0
def re_dca(board: PodBoard, pod: PodState) -> float:
    checks_hit = len(board.checkpoints) * pod.laps + pod.nextCheckId

    pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos

    angle = math.fabs(clean_angle(pod_to_check.angle() - pod.angle))
    a_penalty = (angle / math.pi) / 10 if angle > Constants.max_turn() else 0

    dist_penalty = pod_to_check.length() / DIST_BASE

    return 3 * (checks_hit + 1) - dist_penalty - a_penalty
示例#6
0
    def play(self, pod: PodState) -> PlayOutput:
        out = PlayOutput()
        check = self.board.checkpoints[pod.nextCheckId]
        out.target = check - pod.vel

        ang_to_check = (check - pod.pos).angle()
        if math.fabs(clean_angle(pod.angle - ang_to_check)) < math.pi / 2:
            out.thrust = 100
        else:
            out.thrust = 0

        return out
示例#7
0
def gen_pods(checks: List[Vec2], pos_angles: List[float],
             pos_dists: List[float], angles: List[float],
             vel_angles: List[float], vel_mags: List[float]):
    """
    Generate pods in various states
    :param checks: Checkpoints around which to generate
    :param pos_angles: Angles from check to pod
    :param pos_dists: Distances from check to pod
    :param angles: Orientations of pods. This will be rotated so that 0 points toward the check!
    :param vel_angles: Angles of velocity. Also rotated so that 0 points toward the check.
    :param vel_mags: Magnitudes of velocity
    :return: One pod for each combination of parameters
    """
    relative_poss = [
        UNIT.rotate(ang) * dist for ang in pos_angles for dist in pos_dists
    ]
    relative_vels = [
        UNIT.rotate(ang) * mag for ang in vel_angles for mag in vel_mags
    ]

    print("Generating pods: checks={} positions={} angles={} vels={}".format(
        len(checks), len(relative_poss), len(angles), len(relative_vels)))

    pods = []

    for (c_idx, checkpoint) in enumerate(checks):
        for rel_pos in relative_poss:
            ang_to_check = rel_pos.angle() + math.pi
            pos = checkpoint + rel_pos
            for rel_vel in relative_vels:
                vel = rel_vel.rotate(ang_to_check)
                for angle in angles:
                    pods.append(
                        PodState(pos=pos,
                                 vel=vel,
                                 angle=clean_angle(angle + ang_to_check),
                                 next_check_id=c_idx))

    np.random.shuffle(pods)
    print("{} pods generated".format(len(pods)))
    return pods
示例#8
0
 def test_clean_angle_rotates_if_too_big(self):
     self.assertEqual(0.5 * math.pi, clean_angle(4.5 * math.pi))
示例#9
0
 def test_clean_angle_rotates_if_too_small(self):
     self.assertEqual(-0.5 * math.pi, clean_angle(-4.5 * math.pi))
示例#10
0
 def test_clean_angle_doesnt_change_if_ok(self):
     self.assertEqual(0, clean_angle(0))