Пример #1
0
    def test_get_best_action_works_behind_right(self):
        board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)])
        # Pod is directly right of check, but facing away (slightly to the right)
        pod = PodState(Vec2(7000, 5000))
        pod.angle = -0.000001

        self.__do_get_best_action_assert(board, pod, 0, -Constants.max_turn())
Пример #2
0
    def test_get_best_action_works_right(self):
        board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)])
        # Pod is directly below the check, but the check is behind and to its right
        pod = PodState(Vec2(5000, 0))
        pod.angle = math.pi * 1.25

        self.__do_get_best_action_assert(board, pod, 0, -Constants.max_turn())
Пример #3
0
    def test_get_best_action_works_straight(self):
        board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)])
        # Pod is directly below the check, but looking straight at it
        pod = PodState(Vec2(5000, 0))
        pod.angle = math.pi / 2

        self.__do_get_best_action_assert(board, pod, Constants.max_thrust(), 0)
Пример #4
0
 def test_PodState_equals_not_initial(self):
     p1 = PodState(pos=Vec2(1, 2),
                   angle=1.23,
                   vel=Vec2(3, 4),
                   next_check_id=5)
     p2 = PodState(pos=Vec2(1, 2),
                   angle=1.23,
                   vel=Vec2(3, 4),
                   next_check_id=5)
     self.assertEqual(p1, p2)
Пример #5
0
    def __init__(self, controller: Controller, pod: PodState = None):
        """
        :param controller: Handles movement
        :param pod: Initial state of the pod. If omitted, a new one will be created
        """
        self.controller = controller
        if pod is None:
            self.pod = PodState(controller.board.checkpoints[-1])
        else:
            self.pod = pod

        self.initial_state = self.pod.clone()
Пример #6
0
    def train(self,
              num_episodes: int = 10,
              prob_rand_action: float = 0.5,
              max_turns: int = 50,
              learning_rate: float = 1.0,
              future_discount: float = 0.8) -> List[float]:
        """
        Train starting at a random point
        """
        max_reward_per_ep = []

        for episode in range(num_episodes):
            # The pod starts in a random position at a fixed (far) distance from the check,
            # pointing in a random direction
            pos_offset = UNIT.rotate(
                random.random() * 2 * math.pi) * Constants.check_radius() * (
                    16 * random.random() + 1)
            pod = PodState(pos=self.board.checkpoints[0] + pos_offset,
                           angle=2 * math.pi * random.random() - math.pi)

            max_reward_per_ep.append(
                self.__do_train(pod, max_turns, prob_rand_action,
                                learning_rate, future_discount))

        return max_reward_per_ep
Пример #7
0
    def gen_initial_state(self) -> PodState:
        """
        Generate a state at which to start a training episode
        """
        # The pod starts in a random position at a random distance from the check,
        # pointing in a random direction
        pos_offset = UNIT.rotate(random() * 2 * math.pi) * \
                     Constants.check_radius() * (15 * random() + 1)

        return PodState(pos=self.target.board.get_check(0) + pos_offset,
                        angle=2 * math.pi * random() - math.pi)
Пример #8
0
    def test_gen_pods(self):
        check = Vec2(10, 20)
        pods = gen_pods(
            [check],
            # One to the left, one to the right of the check
            [0, math.pi],
            [1.0],
            # Always pointing at the check
            [0.0],
            # One to the left, one to the right of the pod's heading (so +/- y)
            [math.pi / 2, 3 * math.pi / 2],
            [2.0])

        for pod in [
            PodState(pos=check + Vec2(1, 0), vel=Vec2(0,  2), angle=math.pi, next_check_id=0),
            PodState(pos=check + Vec2(1, 0), vel=Vec2(0, -2), angle=math.pi, next_check_id=0),
            PodState(pos=check - Vec2(1, 0), vel=Vec2(0,  2), angle=0, next_check_id=0),
            PodState(pos=check - Vec2(1, 0), vel=Vec2(0, -2), angle=0, next_check_id=0)
        ]:
            self.assertIn(pod, pods, "{} not found in {}".format(pod, [str(p) for p in pods]))

        self.assertEqual(len(pods), 4)
    def test_state_to_vector_works1(self):
        # A pod at (100, 100) pointing down -X, moving full speed +Y
        pod = PodState(Vec2(100, 100), Vec2(0, Constants.max_vel()), -math.pi)
        # The target checkpoint is directly behind it
        board = PodBoard([Vec2(100 + MAX_DIST, 100), ORIGIN])

        state = state_to_vector(pod, board)

        self.assertEqual(len(state), STATE_VECTOR_LEN)
        self.assertAlmostEqual(state[0], 0, msg="velocity x")
        self.assertAlmostEqual(state[1], -1, msg="velocity y")
        self.assertAlmostEqual(state[2], -1, msg="check1 x")
        self.assertAlmostEqual(state[3], 0, msg="check1 y")
Пример #10
0
class Player:
    """
    A Player encapsulates both a Controller and its associated PodState
    """
    def __init__(self, controller: Controller, pod: PodState = None):
        """
        :param controller: Handles movement
        :param pod: Initial state of the pod. If omitted, a new one will be created
        """
        self.controller = controller
        if pod is None:
            self.pod = PodState(controller.board.checkpoints[-1])
        else:
            self.pod = pod

        self.initial_state = self.pod.clone()

    def __str__(self):
        return "Player[controller=%s pod=%s]" % (type(
            self.controller), self.pod)

    def step(self):
        """
        Have the Controller play once, and update the pod with the output
        """
        self.controller.step(self.pod)

    def reset(self):
        """
        Reset: put the pod at the start position with 0 turns/laps
        """
        self.pod = self.initial_state.clone()
        self.controller.reset()

    def record(self) -> Dict:
        log = {'pod': self.pod.clone()}
        self.controller.record(log)
        return log
Пример #11
0
    def test_state_to_vector_works2(self):
        # A pod at (-100, -100) pointing up +Y, moving 45 degrees down-left
        pod = PodState(Vec2(-100, -100), Vec2(-3, -3), math.pi / 2)
        # The target checkpoint is directly in front
        board = PodBoard([Vec2(-100, 1000), ORIGIN])

        state = state_to_vector(pod, board)

        self.assertEqual(len(state), STATE_VECTOR_LEN)
        self.assertAlmostEqual(state[0],
                               -3 / Constants.max_vel(),
                               msg="velocity x")
        self.assertAlmostEqual(state[1],
                               3 / Constants.max_vel(),
                               msg="velocity y")
        self.assertAlmostEqual(state[2], 1100 / MAX_DIST, msg="check1 x")
        self.assertAlmostEqual(state[3], 0, msg="check1 y")
Пример #12
0
def gen_pods(checks: List[Vec2], pos_angles: List[float],
             pos_dists: List[float], angles: List[float],
             vel_angles: List[float], vel_mags: List[float]):
    """
    Generate pods in various states
    :param checks: Checkpoints around which to generate
    :param pos_angles: Angles from check to pod
    :param pos_dists: Distances from check to pod
    :param angles: Orientations of pods. This will be rotated so that 0 points toward the check!
    :param vel_angles: Angles of velocity. Also rotated so that 0 points toward the check.
    :param vel_mags: Magnitudes of velocity
    :return: One pod for each combination of parameters
    """
    relative_poss = [
        UNIT.rotate(ang) * dist for ang in pos_angles for dist in pos_dists
    ]
    relative_vels = [
        UNIT.rotate(ang) * mag for ang in vel_angles for mag in vel_mags
    ]

    print("Generating pods: checks={} positions={} angles={} vels={}".format(
        len(checks), len(relative_poss), len(angles), len(relative_vels)))

    pods = []

    for (c_idx, checkpoint) in enumerate(checks):
        for rel_pos in relative_poss:
            ang_to_check = rel_pos.angle() + math.pi
            pos = checkpoint + rel_pos
            for rel_vel in relative_vels:
                vel = rel_vel.rotate(ang_to_check)
                for angle in angles:
                    pods.append(
                        PodState(pos=pos,
                                 vel=vel,
                                 angle=clean_angle(angle + ang_to_check),
                                 next_check_id=c_idx))

    np.random.shuffle(pods)
    print("{} pods generated".format(len(pods)))
    return pods
Пример #13
0
    def train_progressively(self,
                            dist_increment: int,
                            ep_per_dist: int,
                            num_incr: int,
                            prob_rand_action: float = 0.5,
                            learning_rate: float = 0.5,
                            future_discount: float = 0.8) -> List[float]:
        """
        Train by randomly generating pods close to the checkpoint, and gradually backing away
        :param dist_increment: Increment by which to increase the distance to the check
        :param ep_per_dist: Number of episodes to run at each increment
        :param num_incr: Number of distance increments to run
        :param prob_rand_action:
        :param learning_rate:
        :param future_discount:
        :return: List of rewards for each episode
        """
        old_rew = self.reward_func
        self.reward_func = check_reward

        max_reward_per_ep = []

        for incr in range(1, num_incr + 1):
            for ep_inc in range(ep_per_dist):
                # Position is (radius + increment) distance from check
                pos_offset = UNIT.rotate(random.random() * 2 * math.pi) * \
                             (Constants.check_radius() + dist_increment * incr)
                pod = PodState(pos=self.board.checkpoints[0] + pos_offset,
                               angle=2 * math.pi * random.random() - math.pi)

                max_reward_per_ep.append(
                    self.__do_train(pod, 5 * incr, prob_rand_action,
                                    learning_rate, future_discount))

        self.reward_func = old_rew
        return max_reward_per_ep
Пример #14
0
 def test_PodState_equals_fails_initial(self):
     p1 = PodState(pos=Vec2(0, 1))
     p2 = PodState(pos=Vec2(1, 1))
     self.assertNotEqual(p1, p2)
Пример #15
0
 def test_PodState_equals_initial(self):
     p1 = PodState()
     p2 = PodState()
     self.assertEqual(p1, p2)
Пример #16
0
    def test_PodState_deserializes_equal_to_serialized(self):
        pod = PodState()

        pod.angle = 1.23
        pod.pos = Vec2(34, 56)
        pod.laps = 69
        pod.vel = Vec2(88, 77)
        pod.nextCheckId = 37

        ser = pod.serialize()
        copy = PodState()
        copy.deserialize(ser)

        self.assertEqual(pod.angle, copy.angle)
        self.assertEqual(pod.pos, copy.pos)
        self.assertEqual(pod.laps, copy.laps)
        self.assertEqual(pod.vel, copy.vel)
        self.assertEqual(pod.nextCheckId, copy.nextCheckId)
Пример #17
0
 def test_pod_to_state_works(self):
     board = PodBoard([Vec2(5000, 5000)])
     pod = PodState()
     state = pod_to_state(pod, board)
     self.assertLess(state, TOTAL_STATES)
Пример #18
0
    def step(self,
             pod: PodState,
             play: PlayOutput,
             output: PodState = None) -> PodState:
        """
        For the given pod, implement the given play.
        On each turn the pods movements are computed this way:
            Rotation: the pod rotates to face the target point, with a maximum of 18 degrees (except for the 1rst round).
            Acceleration: the pod's facing vector is multiplied by the given thrust value. The result is added to the current speed vector.
            Movement: The speed vector is added to the position of the pod. If a collision would occur at this point, the pods rebound off each other.
            Friction: the current speed vector of each pod is multiplied by 0.85
            The speed's values are truncated and the position's values are rounded to the nearest integer.
        Collisions are elastic. The minimum impulse of a collision is 120.
        A boost is in fact an acceleration of 650.
        A shield multiplies the Pod mass by 10.
        The provided angle is absolute. 0° means facing EAST while 90° means facing SOUTH.
        :param pod: Initial state
        :param play: Action to play
        :param output: Output state to update (may be the same as input pod). If not given, a new one will be created.
        :return: The new pod state (same object as output if given)
        """
        if output is None:
            output = PodState()

        # 1. Rotation
        requested_angle = (play.target - pod.pos).angle()
        angle = legal_angle(requested_angle, pod.angle)
        output.angle = angle

        # 2. Acceleration
        dir = UNIT.rotate(angle)
        thrust = int(within(play.thrust, 0, Constants.max_thrust()))
        output.vel = pod.vel + (dir * thrust)

        # 3. Movement
        output.pos = pod.pos + output.vel

        # 4. Friction
        output.vel = output.vel * Constants.friction()

        # 5. Rounding
        output.pos = output.pos.round()
        output.vel = output.vel.truncate()

        # Update progress
        output.turns = pod.turns + 1
        output.nextCheckId = pod.nextCheckId
        output.laps = pod.laps
        check = self.checkpoints[pod.nextCheckId]
        if (check - output.pos).square_length() < Constants.check_radius_sq():
            output.nextCheckId += 1
            if output.nextCheckId >= len(self.checkpoints):
                output.nextCheckId = 0
                output.laps += 1

        return output