Exemplo n.º 1
0
    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.state = np.zeros(self.observation_space.shape)
        self._last_rewards = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print(
                    "retry to generate track (normal if there are not many instances of this message)"
                )
        self.car = Car(self.world, *self.track[0][1:4])

        # there are 20 frames of noise at the begining (+ 4 frames per state)
        for _ in range(24):
            obs = self.step(None)[0]

        return obs
Exemplo n.º 2
0
    def reset(
        self,
        *,
        seed: Optional[int] = None,
        return_info: bool = False,
        options: Optional[dict] = None,
    ):
        super().reset(seed=seed)
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.new_lap = False
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print("retry to generate track (normal if there are not many"
                      "instances of this message)")
        self.car = Car(self.world, *self.track[0][1:4])

        if not return_info:
            return self.step(None)[0]
        else:
            return self.step(None)[0], {}
Exemplo n.º 3
0
    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.obstacle_poly = []
        self.steps = 0

        while True:
            success_track = self._create_track()

            if self.obstacles:
                if success_track:
                    success_obstacles = self._create_obstacles()
                else:
                    success_obstacles = False
            else:
                success_obstacles = True  # just so it goes through to next stage

            if success_track and success_obstacles:
                break
            if self.verbose == 1:
                print("retry to generate track (normal if there are not many"
                      "instances of this message)")
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]
Exemplo n.º 4
0
    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print(
                    "retry to generate track (normal if there are not many of this messages)"
                )
        if self.random:
            startpos = randint(110, 250)
            ind = 20
        else:
            startpos = 5
            ind = 4
        self.car = Car(self.world, *self.track[startpos][1:ind])

        return self.step(None)[0]
Exemplo n.º 5
0
    def reset(self):
        self.ep_return = 0.0
        self.newtile = False
        self.tile_visited_count = 0
        self.last_touch_with_track = 0
        self.last_new_tile = 0
        self.obst_contact = False
        self.obst_contact_count = 0
        self.obst_contact_list = []
        self.t = 0.0
        self.steps_in_episode = 0
        self.state = np.zeros(self.observation_space.shape)
        self.internal_frames = self.skip_frames * (self.frames_per_state -
                                                   1) + 1
        self.int_state = np.zeros([STATE_H, STATE_W, self.internal_frames])

        if self.track_use >= self.repeat_track * self.episodes_per_track:
            intento = 0
            while intento < 21:
                success = self._create_track()
                intento += 1
                if success:
                    self.track_use = 0
                    self.episode_start = range(
                        0, len(self.track),
                        int(len(self.track) / self.episodes_per_track))
                    #print(self.episode_start)
                    break
                if self.verbose > 0:
                    print(
                        intento,
                        " retry to generate new track (normal below 10, limit 20)"
                    )
        else:
            self._create_tiles(self.track, self.border)

        start_tile = self.episode_start[self.track_use %
                                        self.episodes_per_track]
        #print(start_tile, self.track_use, self.episodes_per_track)

        if self.car is not None:
            self.car.destroy()
        if self.episodes_per_track > 1:
            self.car = Car(self.world, *self.track[start_tile][1:4])
        else:
            self.car = Car(self.world, *self.track[0][1:4])

        #trying to detect two very close reset()
        if self.action_taken > 2:
            self.track_use += 1
            self.action_taken = 0
        #self.track_use += 1

        return self.step(None)[0]
    def run(self, env, model, img_resize=None, random_start=False):

        obs = []
        actions = []
        rewards = []

        ob = env.reset()

        if random_start:  #CarRacing random track tile start
            position = np.random.randint(len(env.track))
            env.env.car = Car(env.env.world, *env.env.track[position][1:4])

        done = False
        while not done:

            if img_resize:
                ob = ob[0:84, :, :]
                ob = cv2.resize(ob,
                                dsize=img_resize,
                                interpolation=cv2.INTER_CUBIC)
                ob_model = torch.tensor(ob / 255).view(
                    1, img_resize[0], img_resize[1],
                    3).permute(0, 3, 1, 2).type('torch.FloatTensor')

            action = model(ob_model.to(self.device)).detach().cpu().numpy()[0]

            obs.append(ob)
            actions.append(action)

            ob, r, done, _ = env.step(action)

            rewards.append(r)

        return obs, actions, rewards
Exemplo n.º 7
0
def simulate_batch(batch_num):
    env = CarRacing()

    obs_data = []
    action_data = []
    action = env.action_space.sample()
    for i_episode in range(_BATCH_SIZE):
        observation = env.reset()
        # Little hack to make the Car start at random positions in the race-track
        position = np.random.randint(len(env.track))
        env.car = Car(env.world, *env.track[position][1:4])
        observation = normalize_observation(observation)

        obs_sequence = []

        for _ in range(_TIME_STEPS):
            if _RENDER:
                env.render()

            action = generate_action(action)

            observation, reward, done, info = env.step(action)
            observation = normalize_observation(observation)

            obs_data.append(observation)

    print("Saving dataset for batch {}".format(batch_num))
    np.save('../data/obs_data_VAE_{}'.format(batch_num), obs_data)

    env.close()
Exemplo n.º 8
0
    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.human_render = False

        while True:
            success = self._create_track()
            if success: break
            #print("retry to generate track (normal if there are not many of this messages)")
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]
def play(params, render=True, verbose=False):
    _NUM_TRIALS = 12
    agent_reward = 0
    for trial in range(_NUM_TRIALS):
        observation = env.reset()
        # Little hack to make the Car start at random positions in the race-track
        np.random.seed(int(str(time.time()*1000000)[10:13]))
        position = np.random.randint(len(env.track))
        env.car = Car(env.world, *env.track[position][1:4])

        total_reward = 0.0
        steps = 0
        while True:
            if render:
                env.render()
            action = decide_action(observation, params)
            observation, r, done, info = env.step(action)
            total_reward += r
            # NB: done is not True after 1000 steps when using the hack above for
            # 	  random init of position
            if verbose and (steps % 200 == 0 or steps == 999):
                print("\naction " + str(["{:+0.2f}".format(x) for x in action]))
                print("step {} total_reward {:+0.2f}".format(steps, total_reward))

            steps += 1
            if steps == 999:
                break

        agent_reward += total_reward

    # If reward is out of scale, clip it
    agent_reward = np.maximum(-(100*_NUM_TRIALS), agent_reward)
    return - (agent_reward / _NUM_TRIALS)
Exemplo n.º 10
0
def simulate_batch(batch_num):
    car_env = CarRacing()

    obs_data = []
    action_data = []
    action = car_env.action_space.sample()
    for item in range(batch_size):
        en_observ = car_env.reset()
        # this make car to start in random positions 
        position = np.random.randint(len(car_env.track))
        car_env.car = Car(car_env.world, *car_env.track[position][1:4])
        en_observ = norm_obse(en_observ)

        obs_sequence = []

        # time steps
        for i in range(steps):
            if render:
                car_env.render()

            action = create_action(action)

            en_observ, reward, done, info = car_env.step(action)
            en_observ = norm_obse(en_observ)

            obs_data.append(en_observ)

    print("Saving dataset for batch {}".format(batch_num))
    np.save('data/TR_data_{}'.format(batch_num), obs_data)
    
    car_env.close()
def multiple_runs(on):
    env = CarRacing()

    states = []
    actions = []
    for run in range(MAX_RUNS):
        state = env.reset()
        # done = False
        counter = 0
        for game_time in range(MAX_GAME_TIME):
            # env.render()
            action = generate_action()
            state = _process_frame(state)
            states.append(state)
            actions.append(action)
            state, r, done, _ = env.step(action)

            # print(r)

            if counter == REST_NUM:
                print('RUN:{},GT:{},DATA:{}'.format(run, game_time,
                                                    len(states)))
                position = np.random.randint(len(env.track))
                env.car = Car(env.world, *env.track[position][1:4])
                counter = 0
            counter += 1
    states = np.array(states, dtype=np.uint8)
    actions = np.array(actions, dtype=np.float16)
    save_name = 'rollout_v2_{}.npz'.format(on)
    # np.save(dst + '/' + save_name, frame_and_action)

    np.savez_compressed(dst + '/' + save_name, action=actions, state=states)
Exemplo n.º 12
0
 def _randomize_car_pos(self):
     random_car_position = np.random.randint(len(
         self.environment.env.track))
     self.environment.car = Car(
         self.environment.world,
         *self.environment.track[random_car_position][1:4])
     obs, _, _, _ = self.step([0, 0, 0])
     return obs
Exemplo n.º 13
0
    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            # if self.verbose == 1:
                # print("retry to generate track (normal if there are not many of this messages)")
        self.car = Car(self.world, *self.track[0][1:4], WHEEL_COLOR=self.WHEEL_COLOR, WHEEL_WHITE=self.WHEEL_WHITE, MUD_COLOR=self.MUD_COLOR, HULL_COLOR=self.HULL_COLOR)

        return self.step(None)[0]
Exemplo n.º 14
0
def play(params):
    with torch.no_grad():
        block_print()
        device = torch.device("cpu")
        vae_model = vae.ConvVAE(VAE_Z_SIZE, VAE_KL_TOLERANCE)
        if os.path.exists("checkpoints/vae_checkpoint.pth"):
            vae_model.load_state_dict(
                torch.load("checkpoints/vae_checkpoint.pth",
                           map_location=device))
        vae_model = vae_model.eval()
        vae_model.to(device)

        rnn_model = rnn.MDMRNN(MDN_NUM_MIXTURES, MDN_HIDDEN_SIZE,
                               MDN_INPUT_SIZE, MDN_NUM_LAYERS, MDN_BATCH_SIZE,
                               1, MDN_OUTPUT_SIZE)
        if os.path.exists("checkpoints/rnn_checkpoint.pth"):
            rnn_model.load_state_dict(
                torch.load("checkpoints/rnn_checkpoint.pth",
                           map_location=device))
        rnn_model.to(device)
        rnn_model = rnn_model.eval()

        controller_model = controller.Controller(CMA_EMBEDDING_SIZE,
                                                 CMA_NUM_ACTIONS, params)

        env = CarRacing()
        _NUM_TRIALS = 16
        agent_reward = 0
        for trial in range(_NUM_TRIALS):
            observation = env.reset()
            # Little hack to make the Car start at random positions in the race-track
            np.random.seed(int(str(time.time() * 1000000)[10:13]))
            position = np.random.randint(len(env.track))
            env.car = Car(env.world, *env.track[position][1:4])

            hidden_state, cell_state = train_rnn.init_hidden(
                MDN_NUM_LAYERS, MDN_BATCH_SIZE, MDN_HIDDEN_SIZE, device)

            total_reward = 0.0
            steps = 0
            while True:
                action, hidden_state, cell_state = decide_action(
                    vae_model, rnn_model, controller_model, observation,
                    hidden_state, cell_state, device)
                observation, r, done, info = env.step(action)
                total_reward += r
                # NB: done is not True after 1000 steps when using the hack above for
                # 	  random init of position

                steps += 1
                if steps == 999:
                    break

            # If reward is out of scale, clip it
            total_reward = np.maximum(-100, total_reward)
            agent_reward += total_reward
        env.close()
        return -(agent_reward / _NUM_TRIALS)
Exemplo n.º 15
0
    def reset(self):
        print('the time played(total):')
        print('***********************count is ', self.count)
        print(self.time_count)
        print('long term reward for this episode is ', self.long_term_reward)
        self.long_term_reward = 0
        self.time_count += 1
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.count = 0
        self.life_count = 0.0
        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print(
                    "retry to generate track (normal if there are not many instances of this message)"
                )
        ####################################
        agent_cars = []
        for i in range(number_agent):
            num_1 = i * degree_d
            if i == 1:
                car = Car(self.world, *(0, 225.0, initial_distance_apart))
                print('##################################################',
                      self.track[num_1][1:4])
            else:
                car = Car(self.world, *self.track[num_1][1:4])
            print
            if i == 1:  ######################################set first car or not

                car.lead_car = True
                print('*************************************')
            agent_cars.append(car)
        self.car = agent_cars
        self.car1 = agent_cars
        #self.car = agent_cars[1]
        #self.car = Car(self.world, *self.track[70][1:4])#original

        return self.step(None)[0]
    def fast_reset(self):
        self.car2 = None
        self.laps = 0
        self.on_road = True
        self.next_road_tile = 0

        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.human_render = False
        for tile in self.road:
            tile.road_visited = False
        self.road_poly = copy.deepcopy(self.original_road_poly)
        self.car.destroy()
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)
Exemplo n.º 17
0
    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print("retry to generate track (normal if there are not many"
                      "instances of this message)")
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]
Exemplo n.º 18
0
    def reset(self):
        self.num_step = 1

        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            # if self.verbose == 1:
            #     print("retry to generate track (normal if there are not many of this messages)")
        self.car = Car(self.world, *self.track[0][1:4])

        self.state_temp = np.zeros((STATE_W, STATE_H, 3), dtype=np.uint8)
        return self.old_step((0, 0, 0))[0]
Exemplo n.º 19
0
    def reset(self):
        self._destroy()
        self.time = -1.0
        self.tile_visited_count = 0
        self.state = None
        self.done = False
        self.reward = 0.0
        self.prev_reward = 0.0

        # Build ground
        self.ground = Ground(self.world, PLAYFIELD, PLAYFIELD)

        # Build track tiles
        self.track_tiles_coordinates = TrackCoordinatesBuilder.load_track(self)
        self.track_tiles = [
            TrackTile(self.world, self.track_tiles_coordinates[i],
                      self.track_tiles_coordinates[i - 1])
            for i, element in enumerate(self.track_tiles_coordinates)
        ]
        # Build cones
        cones_coordinates = []
        for i in range(0, len(self.track_tiles)):
            sensor_vertices = self.track_tiles[i].b2Data.fixtures[
                0].shape.vertices
            for j in range(0, len(sensor_vertices)):
                cones_coordinates.append(sensor_vertices[j])
        self.cones = [
            Cone(world=self.world,
                 position=(cone_coordinate[0], cone_coordinate[1]))
            for cone_coordinate in cones_coordinates
        ]

        init_angle = 0
        init_x, init_y = self.track_tiles[0].position

        self.car = Car(self.world,
                       init_angle=init_angle,
                       init_x=init_x,
                       init_y=init_y)

        return self.step(None)[0]
Exemplo n.º 20
0
    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.track_direction = random.choice([-1, 1])
        if self.viewer:
            self.viewer.geoms = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print(
                    "retry to generate track (normal if there are not many of this messages)"
                )
        self.car = Car(self.world, *self.track[0][1:4], draw_car=True)

        return self.step(None)[0]
Exemplo n.º 21
0
def main():
    print("Generating data for env CarRacing-v0")

    env = CarRacing()

    for obs_idx in range(1, 10):

        env.reset()

        observations = []

        for i in range(1000):
            position = np.random.randint(len(env.track))
            angle = np.random.randint(-20, 20)
            x_off = np.random.randint(-20, 20)
            init_data = list(env.track[position][1:4])
            init_data[0] += angle
            init_data[1] += x_off
            env.car = Car(env.world, *init_data)

            observation = env.step(None)[0]

            cropped_obs = normalize_observation(
                observation[:CROP_SIZE,
                            CROP_W_OFFSET:CROP_SIZE + CROP_W_OFFSET, :])

            cropped_obs = cv2.resize(cropped_obs,
                                     dsize=(64, 64),
                                     interpolation=cv2.INTER_CUBIC).astype(
                                         np.float32)

            np.clip(cropped_obs, 0.0, 1.0, cropped_obs)

            if i % 10 == 0:
                print(i)

            if i % 100 == 0:
                plt.imshow(cropped_obs)
                plt.show()

            observations.append(cropped_obs)

        observations = np.array(observations, dtype=np.float32)

        if not os.path.exists("data"):
            os.mkdir("data")

        np.save("data/observations_%d.npy" % obs_idx, observations)
Exemplo n.º 22
0
    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success: break
            if self.verbose == 1:
                print("retry to generate track (normal if there are not many of this messages)")
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]
Exemplo n.º 23
0
def multiple_runs(on):
    env = CarRacing()
    frame_and_action = []
    for run in range(MAX_RUNS):
        env.reset()
        # done = False
        counter = 0
        for game_time in range(MAX_GAME_TIME):
            # env.render()
            action = generate_action()
            state, r, done, _ = env.step(action)
            frame_and_action.append({'state': state, 'action': action})
            # print(r)
            counter += 1
            if counter > REST_NUM:
                print('RUN:{},GT:{},DATA:{}'.format(run, game_time,
                                                    len(frame_and_action)))
                position = np.random.randint(len(env.track))
                env.car = Car(env.world, *env.track[position][1:4])
                counter = 0
    save_name = 'rollout_{}.npy'.format(on)
    np.save(dst + '/' + save_name, frame_and_action)
def simulate_batch(batch_num, save=True, time_steps=None, reduce_size=True):
    env = CarRacing()

    if time_steps is None:
        time_steps = _TIME_STEPS

    obs_data = []
    action_data = []
    action = env.action_space.sample()
    for i_episode in range(_BATCH_SIZE):
        observation = env.reset()
        # Little hack to make the Car start at random positions in the race-track
        position = np.random.randint(len(env.track))
        env.car = Car(env.world, *env.track[position][1:4])
        observation = normalize_observation(observation,
                                            output_4d=False,
                                            reduce_size=reduce_size)
        obs_data.append(observation)

        for _ in range(time_steps):
            if _RENDER:
                env.render()

            action = generate_action(action)

            observation, reward, done, info = env.step(action)
            observation = normalize_observation(observation,
                                                output_4d=False,
                                                reduce_size=reduce_size)

            obs_data.append(observation)

    if save:
        print("Saving dataset for batch {:03d}".format(batch_num))
        np.save('../data/obs_data_VAE_{:03d}'.format(batch_num), obs_data)

    env.close()
    return obs_data
Exemplo n.º 25
0
class CarRacing(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second': FPS
    }

    def __init__(self):
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0

        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([+1, +1, +1]),
                                       dtype=np.float32)  # steer, gas, brake
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.uint8)

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road: return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform(
                0, 2 * math.pi * 1 / CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD
            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))

        #print "\n".join(str(h) for h in checkpoints)
        #self.road_poly = [ (    # uncomment this to see checkpoints
        #    [ (tx,ty) for a,tx,ty in checkpoints ],
        #    (0.7,0.7,0.9) ) ]
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while 1:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi
            while True:  # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0: break
                if not failed: break
                alpha -= 2 * math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x * dest_dx + r1y * dest_dy  # destination vector projected on rad
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4: break
            no_freeze -= 1
            if no_freeze == 0: break
        #print "\n".join([str(t) for t in enumerate(track)])

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0: return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[
                i - 1][0] <= self.start_alpha
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        #print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (x1 - TRACK_WIDTH * math.cos(beta1),
                       y1 - TRACK_WIDTH * math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH * math.cos(beta1),
                       y1 + TRACK_WIDTH * math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH * math.cos(beta2),
                       y2 - TRACK_WIDTH * math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH * math.cos(beta2),
                       y2 + TRACK_WIDTH * math.sin(beta2))
            t = self.world.CreateStaticBody(fixtures=fixtureDef(
                shape=polygonShape(
                    vertices=[road1_l, road1_r, road2_r, road2_l])))
            t.userData = t
            c = 0.01 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1),
                        y1 + side * TRACK_WIDTH * math.sin(beta1))
                b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                        y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1))
                b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2),
                        y2 + side * TRACK_WIDTH * math.sin(beta2))
                b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                        y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2))
                self.road_poly.append(([b1_l, b1_r, b2_r,
                                        b2_l], (1, 1, 1) if i % 2 == 0 else
                                       (1, 0, 0)))
        self.track = track
        return True

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.human_render = False

        while True:
            success = self._create_track()
            if success: break
            #print("retry to generate track (normal if there are not many of this messages)")
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]

    def step(self, action):
        # print(self.t * FPS)
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS

        self.state = self.render("state_pixels")

        step_reward = 0
        done = False
        if action is not None:  # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            #self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count == len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100

        return self.state, step_reward, done, {}

    def render(self, mode='human'):
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label('0000',
                                                 font_size=36,
                                                 x=20,
                                                 y=WINDOW_H * 2.5 / 40.00,
                                                 anchor_x='left',
                                                 anchor_y='center',
                                                 color=(255, 255, 255, 255))
            self.transform = rendering.Transform()

        if "t" not in self.__dict__: return  # reset() not called yet

        zoom = ZOOM * SCALE
        zoom_state = ZOOM * SCALE * STATE_W / WINDOW_W
        zoom_video = ZOOM * SCALE * VIDEO_W / WINDOW_W
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                            scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                            scroll_y * zoom * math.cos(angle)))
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode != "state_pixels")

        arr = None
        win = self.viewer.window
        if mode != 'state_pixels':
            win.switch_to()
            win.dispatch_events()
        if mode == "rgb_array" or mode == "state_pixels":
            win.clear()
            t = self.transform
            if mode == 'rgb_array':
                VP_W = VIDEO_W
                VP_H = VIDEO_H
            else:
                VP_W = STATE_W
                VP_H = STATE_H
            gl.glViewport(0, 0, VP_W, VP_H)
            t.enable()
            self.render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()
            t.disable()
            self.render_indicators(WINDOW_W,
                                   WINDOW_H)  # TODO: find why 2x needed, wtf
            image_data = pyglet.image.get_buffer_manager().get_color_buffer(
            ).get_image_data()
            arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
            arr = arr.reshape(VP_H, VP_W, 4)
            arr = arr[::-1, :, 0:3]

        if mode == "rgb_array" and not self.human_render:  # agent can call or not call env.render() itself when recording video.
            win.flip()

        if mode == 'human':
            self.human_render = True
            win.clear()
            t = self.transform
            gl.glViewport(0, 0, WINDOW_W, WINDOW_H)
            t.enable()
            self.render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()
            t.disable()
            self.render_indicators(WINDOW_W, WINDOW_H)
            win.flip()

        self.viewer.onetime_geoms = []
        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(0.4, 0.8, 0.4, 1.0)
        gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        gl.glColor4f(0.4, 0.9, 0.4, 1.0)
        k = PLAYFIELD / 20.0
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                gl.glVertex3f(k * x + k, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + k, 0)
                gl.glVertex3f(k * x + k, k * y + k, 0)
        for poly, color in self.road_poly:
            gl.glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)
        gl.glEnd()

    def render_indicators(self, W, H):
        gl.glBegin(gl.GL_QUADS)
        s = W / 40.0
        h = H / 40.0
        gl.glColor4f(0, 0, 0, 1)
        gl.glVertex3f(W, 0, 0)
        gl.glVertex3f(W, 5 * h, 0)
        gl.glVertex3f(0, 5 * h, 0)
        gl.glVertex3f(0, 0, 0)

        def vertical_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h, 0)
            gl.glVertex3f((place + 0) * s, h, 0)

        def horiz_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 2 * h, 0)
            gl.glVertex3f((place + 0) * s, 2 * h, 0)

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
        vertical_ind(7, 0.01 * self.car.wheels[0].omega,
                     (0.0, 0, 1))  # ABS sensors
        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
        gl.glEnd()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()
Exemplo n.º 26
0
class CarRacing(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second' : FPS
    }

    def __init__(self, verbose=1):
        EzPickle.__init__(self)
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose

        self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), dtype=np.float32)  # steer, gas, brake
        self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8)

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road: return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD)
            if c==0:
                alpha = 0
                rad = 1.5*TRACK_RAD
            if c==CHECKPOINTS-1:
                alpha = 2*math.pi*c/CHECKPOINTS
                self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS
                rad = 1.5*TRACK_RAD
            checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) )

        #print "\n".join(str(h) for h in checkpoints)
        #self.road_poly = [ (    # uncomment this to see checkpoints
        #    [ (tx,ty) for a,tx,ty in checkpoints ],
        #    (0.7,0.7,0.9) ) ]
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5*TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while 1:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2*math.pi
            while True: # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0: break
                if not failed: break
                alpha -= 2*math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x*dest_dx + r1y*dest_dy  # destination vector projected on rad
            while beta - alpha >  1.5*math.pi: beta -= 2*math.pi
            while beta - alpha < -1.5*math.pi: beta += 2*math.pi
            prev_beta = beta
            proj *= SCALE
            if proj >  0.3: beta -= min(TRACK_TURN_RATE, abs(0.001*proj))
            if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001*proj))
            x += p1x*TRACK_DETAIL_STEP
            y += p1y*TRACK_DETAIL_STEP
            track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) )
            if laps > 4: break
            no_freeze -= 1
            if no_freeze==0: break
        #print "\n".join([str(t) for t in enumerate(track)])

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i==0: return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha
            if pass_through_start and i2==-1:
                i2 = i
            elif pass_through_start and i1==-1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1))
        assert i1!=-1
        assert i2!=-1

        track = track[i1:i2-1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square( first_perp_x*(track[0][2] - track[-1][2]) ) +
            np.square( first_perp_y*(track[0][3] - track[-1][3]) ))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False]*len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i-neg-0][1]
                beta2 = track[i-neg-1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i-neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i-1]
            road1_l = (x1 - TRACK_WIDTH*math.cos(beta1), y1 - TRACK_WIDTH*math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH*math.cos(beta1), y1 + TRACK_WIDTH*math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH*math.cos(beta2), y2 - TRACK_WIDTH*math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH*math.cos(beta2), y2 + TRACK_WIDTH*math.sin(beta2))
            t = self.world.CreateStaticBody( fixtures = fixtureDef(
                shape=polygonShape(vertices=[road1_l, road1_r, road2_r, road2_l])
                ))
            t.userData = t
            c = 0.01*(i%3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(( [road1_l, road1_r, road2_r, road2_l], t.color ))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side* TRACK_WIDTH        *math.cos(beta1), y1 + side* TRACK_WIDTH        *math.sin(beta1))
                b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1))
                b2_l = (x2 + side* TRACK_WIDTH        *math.cos(beta2), y2 + side* TRACK_WIDTH        *math.sin(beta2))
                b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2))
                self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) ))
        self.track = track
        return True

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success: break
            if self.verbose == 1:
                print("retry to generate track (normal if there are not many of this messages)")
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]

    def step(self, action):
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(1.0/FPS)
        self.world.Step(1.0/FPS, 6*30, 2*30)
        self.t += 1.0/FPS

        self.state = self.render("state_pixels")

        step_reward = 0
        done = False
        if action is not None: # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            #self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count==len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100

        return self.state, step_reward, done, {}

    def render(self, mode='human'):
        assert mode in ['human', 'state_pixels', 'rgb_array']
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label('0000', font_size=36,
                x=20, y=WINDOW_H*2.5/40.00, anchor_x='left', anchor_y='center',
                color=(255,255,255,255))
            self.transform = rendering.Transform()

        if "t" not in self.__dict__: return  # reset() not called yet

        zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1)   # Animate zoom first second
        zoom_state  = ZOOM*SCALE*STATE_W/WINDOW_W
        zoom_video  = ZOOM*SCALE*VIDEO_W/WINDOW_W
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)),
            WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) )
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode!="state_pixels")

        arr = None
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()

        win.clear()
        t = self.transform
        if mode=='rgb_array':
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == 'state_pixels':
            VP_W = STATE_W
            VP_H = STATE_H
        else:
            pixel_scale = 1
            if hasattr(win.context, '_nscontext'):
                pixel_scale = win.context._nscontext.view().backingScaleFactor()  # pylint: disable=protected-access
            VP_W = pixel_scale * WINDOW_W
            VP_H = pixel_scale * WINDOW_H

        gl.glViewport(0, 0, VP_W, VP_H)
        t.enable()
        self.render_road()
        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()
        self.render_indicators(WINDOW_W, WINDOW_H)

        if mode == 'human':
            win.flip()
            return self.viewer.isopen

        image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
        arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
        arr = arr.reshape(VP_H, VP_W, 4)
        arr = arr[::-1, :, 0:3]

        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(0.4, 0.8, 0.4, 1.0)
        gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        gl.glColor4f(0.4, 0.9, 0.4, 1.0)
        k = PLAYFIELD/20.0
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                gl.glVertex3f(k*x + k, k*y + 0, 0)
                gl.glVertex3f(k*x + 0, k*y + 0, 0)
                gl.glVertex3f(k*x + 0, k*y + k, 0)
                gl.glVertex3f(k*x + k, k*y + k, 0)
        for poly, color in self.road_poly:
            gl.glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)
        gl.glEnd()

    def render_indicators(self, W, H):
        gl.glBegin(gl.GL_QUADS)
        s = W/40.0
        h = H/40.0
        gl.glColor4f(0,0,0,1)
        gl.glVertex3f(W, 0, 0)
        gl.glVertex3f(W, 5*h, 0)
        gl.glVertex3f(0, 5*h, 0)
        gl.glVertex3f(0, 0, 0)
        def vertical_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place+0)*s, h + h*val, 0)
            gl.glVertex3f((place+1)*s, h + h*val, 0)
            gl.glVertex3f((place+1)*s, h, 0)
            gl.glVertex3f((place+0)*s, h, 0)
        def horiz_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place+0)*s, 4*h , 0)
            gl.glVertex3f((place+val)*s, 4*h, 0)
            gl.glVertex3f((place+val)*s, 2*h, 0)
            gl.glVertex3f((place+0)*s, 2*h, 0)
        true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(5, 0.02*true_speed, (1,1,1))
        vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors
        vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1))
        vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1))
        vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1))
        horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0))
        horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0))
        gl.glEnd()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()
Exemplo n.º 27
0
class CarRacingSoft(gym.Env, EzPickle):
    metadata = {'render.modes': ['human'], 'video.frames_per_second': FPS}

    color_black = np.array([0., 0., 0.])
    color_white = np.array([1., 1., 1.])
    color_red = np.array([1., 0., 0.])
    color_green = np.array([0., 1., 0.])
    color_grass_dark = np.array([0.4, 0.8, 0.4])
    color_grass_light = np.array([0.4, 0.9, 0.4])
    color_abs_light = np.array([0., 0., 1.])
    color_abs_dark = np.array([0.2, 0., 1.])

    def __init__(self, frame_skip, verbose=False):
        EzPickle.__init__(self)

        if frame_skip < 1:
            raise ValueError("The value of frame_skip must be at least 1")

        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        self.fd_tile = fixtureDef(shape=polygonShape(
            vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))

        self.action_space = spaces.Box(np.array([-1, 0, 0], dtype=np.float32),
                                       np.array([+1, +1, +1],
                                                dtype=np.float32),
                                       dtype=np.float32)  # steer, gas, brake

        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.float32)
        self.state = np.zeros([STATE_H, STATE_W, 3], dtype=np.float32)
        self.frame_skip = frame_skip

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform(
                0, 2 * math.pi * 1 / CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD
            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi
            while True:  # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break
                if not failed:
                    break
                alpha -= 2 * math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x * dest_dx + r1y * dest_dy  # destination vector projected on rad
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[
                i - 1][0] <= self.start_alpha
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose:
            print("Track generation: %i..%i -> %i-tiles track" %
                  (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (x1 - TRACK_WIDTH * math.cos(beta1),
                       y1 - TRACK_WIDTH * math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH * math.cos(beta1),
                       y1 + TRACK_WIDTH * math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH * math.cos(beta2),
                       y2 - TRACK_WIDTH * math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH * math.cos(beta2),
                       y2 + TRACK_WIDTH * math.sin(beta2))
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.userData = t
            c = 0.01 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1),
                        y1 + side * TRACK_WIDTH * math.sin(beta1))
                b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                        y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1))
                b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2),
                        y2 + side * TRACK_WIDTH * math.sin(beta2))
                b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                        y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2))
                self.road_poly.append(([b1_l, b1_r, b2_r,
                                        b2_l], (1, 1, 1) if i % 2 == 0 else
                                       (1, 0, 0)))
        self.track = track
        return True

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.human_render = False
        self.frames = 0

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose:
                print(
                    "retry to generate track (normal if there are not many instances of this message)"
                )
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]

    def step(self, action):
        total_reward = 0
        for _ in range(self.frame_skip):
            if action is not None:
                self.car.steer(-action[0])
                self.car.gas(action[1])
                self.car.brake(action[2])

            self.car.step(1.0 / FPS)
            self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
            self.t += 1.0 / FPS

            step_reward = 0
            done = False
            if action is not None:  # First step without action, called from reset()
                self.reward -= 0.1
                # We actually don't want to count fuel spent, we want car to be faster.
                # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
                self.car.fuel_spent = 0.0
                step_reward = self.reward - self.prev_reward
                self.prev_reward = self.reward
                if self.tile_visited_count == len(self.track):
                    done = True
                x, y = self.car.hull.position
                if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                    done = True
                    step_reward = -100

            total_reward += step_reward
            self.frames += 1
            if self.frames > 1000: done = True
            if done or action is None: break

        self._draw()

        green = (self.state[66:78, 43:52, 1] > 0.5)
        # print("green:", sum(green.flatten()))
        speed = sum(self.state[85:, 2, 0])

        abs1 = sum(self.state[85:, 9, 2])
        abs2 = sum(self.state[85:, 14, 2])
        abs3 = sum(self.state[85:, 19, 2])
        abs4 = sum(self.state[85:, 24, 2])

        steering_input_left = sum(self.state[90, 37:48, 1])
        steering_input_right = sum(self.state[90, 47:58, 1])

        steering = steering_input_right - steering_input_left

        rotation_left = sum(self.state[90, 59:72, 0])
        rotation_right = sum(self.state[90, 72:85, 0])

        rotation = rotation_right - rotation_left

        print(
            f"speed:{speed}\tabs:\t{abs1}\t{abs2}\t{abs3}\t{abs4}\tsteering:{steering}\trotation:{rotation}"
        )

        return np.copy(self.state), total_reward, done, {}

    def render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return

        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.SimpleImageViewer()

        self.viewer.imshow((self.state.repeat(RENDER_UPSCALE, axis=0).repeat(
            RENDER_UPSCALE, axis=1) * 255).astype(np.uint8))

    def _draw(self):
        # Simple 2D affine transformation class
        class Transform():
            def __init__(self, *values):
                self.matrix = values if len(values) else [
                    1., 0., 0., 0., 1., 0., 0., 0., 1.
                ]

            @staticmethod
            def translation(x, y):
                return Transform(1.0, 0.0, x, 0.0, 1.0, y, 0.0, 0.0, 1.0)

            @staticmethod
            def scale(x, y):
                return Transform(x, 0.0, 0.0, 0.0, y, 0.0, 0.0, 0.0, 1.0)

            @staticmethod
            def rotation(angle):
                cos, sin = math.cos(angle), math.sin(angle)
                return Transform(cos, -sin, 0.0, sin, cos, 0.0, 0.0, 0.0, 1.0)

            def apply_and_swap(self, point):
                sa, sb, sc, sd, se, sf, _, _, _ = self.matrix
                x, y = point
                return (x * sd + y * se + sf, x * sa + y * sb + sc)

            def __mul__(self, other):
                sa, sb, sc, sd, se, sf, _, _, _ = self.matrix
                oa, ob, oc, od, oe, of, _, _, _ = other.matrix
                return Transform(sa * oa + sb * od, sa * ob + sb * oe,
                                 sa * oc + sb * of + sc, sd * oa + se * od,
                                 sd * ob + se * oe, sd * oc + se * of + sf,
                                 0.0, 0.0, 1.0)

            def __imul__(self, other):
                return self.__mul__(other)

        class Renderer():
            def __init__(self, env):
                self.env = env

            def draw_polygon(self, path, color):
                self.env._fill_polygon(path, self.env.state, color)

        if "t" not in self.__dict__: return  # reset() not called yet

        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(
            self.t, 1)  # Animate zoom first second
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform = Transform.translation(STATE_W / 2, STATE_H * 3 / 4)
        self.transform *= Transform.scale(STATE_W / 1000, STATE_H / 1000)
        self.transform *= Transform.scale(zoom, -zoom)
        self.transform *= Transform.rotation(angle)
        self.transform *= Transform.translation(-scroll_x, -scroll_y)

        # Clear
        self.state[:, :, :] = self.color_black

        # Draw road, car and indicators
        self._render_road(scroll_x, scroll_y, zoom)
        self.car.draw(Renderer(self), False)
        self._render_indicators()

    def _render_road(self, scroll_x, scroll_y, zoom):
        self._fill_polygon([(-PLAYFIELD, +PLAYFIELD), (+PLAYFIELD, +PLAYFIELD),
                            (+PLAYFIELD, -PLAYFIELD),
                            (-PLAYFIELD, -PLAYFIELD)], self.state,
                           self.color_grass_dark)
        k = PLAYFIELD / 20.0
        mindist = 2000000 / (zoom**2)
        for x in range(-20, 20, 2):
            kx = k * x
            dist = (kx - scroll_x)**2
            if dist >= mindist: continue
            for y in range(-20, 20, 2):
                ky = k * y
                if dist + (ky - scroll_y)**2 >= mindist: continue
                self._fill_polygon([(kx + k, ky + 0), (kx + 0, ky + 0),
                                    (kx + 0, ky + k), (kx + k, ky + k)],
                                   self.state, self.color_grass_light)
        for poly, color in self.road_poly:
            if (poly[0][0] - scroll_x)**2 + (poly[0][1] -
                                             scroll_y)**2 >= mindist:
                continue
            self._fill_polygon(poly, self.state, color)

    def _render_indicators(self):
        s = STATE_W / 40
        h = STATE_H / 40
        self._fill_polygon([(0, STATE_H), (STATE_W, STATE_H),
                            (STATE_W, STATE_H - 5 * h), (0, STATE_H - 5 * h)],
                           self.state,
                           self.color_black,
                           transform=False)

        def vertical_ind(place, val, color):
            self._fill_polygon([((place + 0) * s, STATE_H - h - h * val),
                                ((place + 2) * s, STATE_H - h - h * val),
                                ((place + 2) * s, STATE_H - h),
                                ((place + 0) * s, STATE_H - h)],
                               self.state,
                               color,
                               transform=False)

        def horiz_ind(place, val, color):
            self._fill_polygon([((place + 0) * s, STATE_H - 4 * h),
                                ((place + val) * s, STATE_H - 4 * h),
                                ((place + val) * s, STATE_H - 1.5 * h),
                                ((place + 0) * s, STATE_H - 1.5 * h)],
                               self.state,
                               color,
                               transform=False)

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(1, 0.02 * true_speed, self.color_white)
        vertical_ind(4, 0.01 * self.car.wheels[0].omega,
                     self.color_abs_light)  # ABS sensors
        vertical_ind(6, 0.01 * self.car.wheels[1].omega, self.color_abs_light)
        vertical_ind(8, 0.01 * self.car.wheels[2].omega, self.color_abs_dark)
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, self.color_abs_dark)
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, self.color_green)
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, self.color_red)

    # Adapted from https://github.com/luispedro/mahotas/blob/master/mahotas/polygon.py
    def _fill_polygon(self, polygon, canvas, color, transform=True):
        '''
        fill_polygon([(y0,x0), (y1,x1),...], canvas, color=1)
        Draw a filled polygon in canvas
        Parameters
        ----------
        polygon : list of pairs
            a list of (y,x) points
        canvas : ndarray
            where to draw, will be modified in place
        color : integer, optional
            which colour to use (default: 1)
        '''
        # algorithm adapted from: http://www.alienryderflex.com/polygon_fill/
        if not len(polygon):
            return

        if transform:
            polygon = [
                self.transform.apply_and_swap(point) for point in polygon
            ]
        else:
            polygon = [(float(y), float(x)) for x, y in polygon]

        min_y = max(int(min(y for y, x in polygon)), 0)
        if min_y >= canvas.shape[0]: return
        max_y = min(max(int(max(y + 1 for y, x in polygon)), 0),
                    canvas.shape[0])
        if max_y <= 0: return
        if min(x for y, x in polygon) >= canvas.shape[1]: return
        if max(x for y, x in polygon) < 0: return
        for y in range(min_y, max_y):
            nodes = []
            j = -1
            for i, p in enumerate(polygon):
                pj = polygon[j]
                if p[0] < y and pj[0] >= y or pj[0] < y and p[0] >= y:
                    dy = pj[0] - p[0]
                    if dy:
                        nodes.append((p[1] + (y - p[0]) / (pj[0] - p[0]) *
                                      (pj[1] - p[1])))
                    elif p[0] == y:
                        nodes.append(p[1])
                j = i
            nodes.sort()
            for n, nn in zip(nodes[::2], nodes[1::2]):
                canvas[y,
                       max(int(n), 0):min(max(int(nn), 0), canvas.shape[1]
                                          )] = color
Exemplo n.º 28
0
class CarRacing(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second': FPS
    }

    def __init__(self, verbose=1):
        EzPickle.__init__(self)
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        self.fd_tile = fixtureDef(shape=polygonShape(
            vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))

        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([+1, +1, +1]),
                                       dtype=np.float32)  # steer, gas, brake
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.uint8)

        CHECKPOINTS = 12
        # Create checkpoints
        # TODO Use a real way to keep a constant track across training runs
        self.checkpoints = []
        self.checkpoints.append((0, 225.0, 0.0))
        self.checkpoints.append(
            (0.7825323624208509, 89.6427647192468, 89.1304349066121))
        self.checkpoints.append(
            (1.5543323243350344, 1.783985395462951, 108.34693482727236))
        self.checkpoints.append(
            (1.6057305460922464, -2.2173644459530517, 63.446740574663174))
        self.checkpoints.append(
            (2.6175081644916047, -58.76396976672586, 33.965461046114534))
        self.checkpoints.append(
            (2.7871461118931458, -134.63944761816262, 49.82679389320398))
        self.checkpoints.append(
            (3.414113547480756, -106.41825645850612, -29.741137759708423))
        self.checkpoints.append(
            (3.8745797378794, -77.61403468584427, -69.87679530100709))
        self.checkpoints.append(
            (4.193711736042842, -33.56139367373087, -58.79641863577176))
        self.checkpoints.append(
            (4.928823629511352, 29.852520836123745, -135.76810358020867))
        self.checkpoints.append(
            (5.29734709463665, 68.99766439052978, -104.18233435806235))
        self.checkpoints.append(
            (5.759586531581287, 194.85571585149864, -112.5000000000001))

        self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
        '''
        self.checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD)
            if c==0:
                alpha = 0
                rad = 1.5*TRACK_RAD
            if c==CHECKPOINTS-1:
                alpha = 2*math.pi*c/CHECKPOINTS
                
                rad = 1.5*TRACK_RAD
            self.checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) )
        '''

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        checkpoints = self.checkpoints

        #print ("\n".join(str(h) for h in checkpoints))
        # self.road_poly = [ (    # uncomment this to see checkpoints
        #    [ (tx,ty) for a,tx,ty in checkpoints ],
        #    (0.7,0.7,0.9) ) ]
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi
            while True:  # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break
                if not failed:
                    break
                alpha -= 2 * math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x * dest_dx + r1y * dest_dy  # destination vector projected on rad
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break
        # print "\n".join([str(t) for t in enumerate(track)])

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[
                i - 1][0] <= self.start_alpha
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" %
                  (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        '''
        border = [False]*len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i-neg-0][1]
                beta2 = track[i-neg-1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i-neg] |= border[i]
        '''

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (x1 - TRACK_WIDTH * math.cos(beta1),
                       y1 - TRACK_WIDTH * math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH * math.cos(beta1),
                       y1 + TRACK_WIDTH * math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH * math.cos(beta2),
                       y2 - TRACK_WIDTH * math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH * math.cos(beta2),
                       y2 + TRACK_WIDTH * math.sin(beta2))
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.userData = t
            c = 0.01 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            '''
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side* TRACK_WIDTH        *math.cos(beta1), y1 + side* TRACK_WIDTH        *math.sin(beta1))
                b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1))
                b2_l = (x2 + side* TRACK_WIDTH        *math.cos(beta2), y2 + side* TRACK_WIDTH        *math.sin(beta2))
                b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2))
                self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) ))
            '''
        self.track = track
        return True

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print(
                    "retry to generate track (normal if there are not many of this messages)"
                )
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]

    def step(self, action):
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS

        self.state = self.render("state_pixels")

        step_reward = 0
        done = False
        if action is not None:  # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count == len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100
            # TODO: Exit if far off track

        return self.state, step_reward, done, {}

    def render(self, mode='human'):
        assert mode in ['human', 'state_pixels', 'rgb_array']

        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label('0000',
                                                 font_size=36,
                                                 x=20,
                                                 y=WINDOW_H * 2.5 / 40.00,
                                                 anchor_x='left',
                                                 anchor_y='center',
                                                 color=(255, 255, 255, 255))
            self.transform = rendering.Transform()

        if "t" not in self.__dict__: return  # reset() not called yet

        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(
            self.t, 1)  # Animate zoom first second
        zoom_state = ZOOM * SCALE * STATE_W / WINDOW_W
        zoom_video = ZOOM * SCALE * VIDEO_W / WINDOW_W
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        #if np.linalg.norm(vel) > 0.5:
        #    angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                            scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                            scroll_y * zoom * math.cos(angle)))
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode != "state_pixels")

        arr = None
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()

        win.clear()
        t = self.transform
        if mode == 'rgb_array':
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == 'state_pixels':
            VP_W = STATE_W
            VP_H = STATE_H
        else:
            pixel_scale = 1
            if hasattr(win.context, '_nscontext'):
                pixel_scale = win.context._nscontext.view().backingScaleFactor(
                )  # pylint: disable=protected-access
            VP_W = int(pixel_scale * WINDOW_W)
            VP_H = int(pixel_scale * WINDOW_H)

        gl.glViewport(0, 0, VP_W, VP_H)
        t.enable()
        self.render_road()
        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()
        # self.render_indicators(WINDOW_W, WINDOW_H)

        if mode == 'human':
            win.flip()
            return self.viewer.isopen

        image_data = pyglet.image.get_buffer_manager().get_color_buffer(
        ).get_image_data()
        arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
        arr = arr.reshape(VP_H, VP_W, 4)
        arr = arr[::-1, :, 0:3]

        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(0.4, 0.8, 0.4, 1.0)
        gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        gl.glColor4f(0.4, 0.9, 0.4, 1.0)
        k = PLAYFIELD / 20.0
        #for x in range(-20, 20, 2):
        #    for y in range(-20, 20, 2):
        #        gl.glVertex3f(k*x + k, k*y + 0, 0)
        #        gl.glVertex3f(k*x + 0, k*y + 0, 0)
        #        gl.glVertex3f(k*x + 0, k*y + k, 0)
        #        gl.glVertex3f(k*x + k, k*y + k, 0)
        for poly, color in self.road_poly:
            gl.glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)
        gl.glEnd()

    def render_indicators(self, W, H):
        gl.glBegin(gl.GL_QUADS)
        s = W / 40.0
        h = H / 40.0
        gl.glColor4f(0, 0, 0, 1)
        gl.glVertex3f(W, 0, 0)
        gl.glVertex3f(W, 5 * h, 0)
        gl.glVertex3f(0, 5 * h, 0)
        gl.glVertex3f(0, 0, 0)

        def vertical_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h, 0)
            gl.glVertex3f((place + 0) * s, h, 0)

        def horiz_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 2 * h, 0)
            gl.glVertex3f((place + 0) * s, 2 * h, 0)

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
        vertical_ind(7, 0.01 * self.car.wheels[0].omega,
                     (0.0, 0, 1))  # ABS sensors
        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
        gl.glEnd()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()
Exemplo n.º 29
0
class CarRacing(gym.Env, EzPickle):
    metadata = {
        "render.modes": ["human", "rgb_array", "state_pixels"],
        "video.frames_per_second": FPS,
    }

    def __init__(self, verbose=1):
        EzPickle.__init__(self)
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        self.fd_tile = fixtureDef(
            shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])
        )

        self.action_space = spaces.Box(
            np.array([-1, 0, 0]).astype(np.float32),
            np.array([+1, +1, +1]).astype(np.float32),
        )  # steer, gas, brake

        self.observation_space = spaces.Box(
            low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8
        )

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS)
            alpha = 2 * math.pi * c / CHECKPOINTS + noise
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)

            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD

            checkpoints.append((alpha, rad * math.cos(alpha), rad * math.sin(alpha)))
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi

            while True:  # Find destination from checkpoints
                failed = True

                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break

                if not failed:
                    break

                alpha -= 2 * math.pi
                continue

            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            # destination vector projected on rad:
            proj = r1x * dest_dx + r1y * dest_dy
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = (
                track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha
            )
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1 : i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2]))
            + np.square(first_perp_y * (track[0][3] - track[-1][3]))
        )
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (
                x1 - TRACK_WIDTH * math.cos(beta1),
                y1 - TRACK_WIDTH * math.sin(beta1),
            )
            road1_r = (
                x1 + TRACK_WIDTH * math.cos(beta1),
                y1 + TRACK_WIDTH * math.sin(beta1),
            )
            road2_l = (
                x2 - TRACK_WIDTH * math.cos(beta2),
                y2 - TRACK_WIDTH * math.sin(beta2),
            )
            road2_r = (
                x2 + TRACK_WIDTH * math.cos(beta2),
                y2 + TRACK_WIDTH * math.sin(beta2),
            )
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.userData = t
            c = 0.01 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (
                    x1 + side * TRACK_WIDTH * math.cos(beta1),
                    y1 + side * TRACK_WIDTH * math.sin(beta1),
                )
                b1_r = (
                    x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                    y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1),
                )
                b2_l = (
                    x2 + side * TRACK_WIDTH * math.cos(beta2),
                    y2 + side * TRACK_WIDTH * math.sin(beta2),
                )
                b2_r = (
                    x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                    y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2),
                )
                self.road_poly.append(
                    ([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))
                )
        self.track = track
        return True

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print(
                    "retry to generate track (normal if there are not many"
                    "instances of this message)"
                )
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]

    def step(self, action):
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS

        self.state = self.render("state_pixels")

        step_reward = 0
        done = False
        if action is not None:  # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count == len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100

        return self.state, step_reward, done, {}

    def render(self, mode="human"):
        assert mode in ["human", "state_pixels", "rgb_array"]
        if self.viewer is None:
            from gym.envs.classic_control import rendering

            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label(
                "0000",
                font_size=36,
                x=20,
                y=WINDOW_H * 2.5 / 40.00,
                anchor_x="left",
                anchor_y="center",
                color=(255, 255, 255, 255),
            )
            self.transform = rendering.Transform()

        if "t" not in self.__dict__:
            return  # reset() not called yet

        # Animate zoom first second:
        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1)
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2
            - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4
            - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle)),
        )
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode != "state_pixels")

        arr = None
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()

        win.clear()
        t = self.transform
        if mode == "rgb_array":
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == "state_pixels":
            VP_W = STATE_W
            VP_H = STATE_H
        else:
            pixel_scale = 1
            if hasattr(win.context, "_nscontext"):
                pixel_scale = (
                    win.context._nscontext.view().backingScaleFactor()
                )  # pylint: disable=protected-access
            VP_W = int(pixel_scale * WINDOW_W)
            VP_H = int(pixel_scale * WINDOW_H)

        gl.glViewport(0, 0, VP_W, VP_H)
        t.enable()
        self.render_road()
        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()
        self.render_indicators(WINDOW_W, WINDOW_H)

        if mode == "human":
            win.flip()
            return self.viewer.isopen

        image_data = (
            pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
        )
        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep="")
        arr = arr.reshape(VP_H, VP_W, 4)
        arr = arr[::-1, :, 0:3]

        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        colors = [0.4, 0.8, 0.4, 1.0] * 4
        polygons_ = [
            +PLAYFIELD,
            +PLAYFIELD,
            0,
            +PLAYFIELD,
            -PLAYFIELD,
            0,
            -PLAYFIELD,
            -PLAYFIELD,
            0,
            -PLAYFIELD,
            +PLAYFIELD,
            0,
        ]

        k = PLAYFIELD / 20.0
        colors.extend([0.4, 0.9, 0.4, 1.0] * 4 * 20 * 20)
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                polygons_.extend(
                    [
                        k * x + k,
                        k * y + 0,
                        0,
                        k * x + 0,
                        k * y + 0,
                        0,
                        k * x + 0,
                        k * y + k,
                        0,
                        k * x + k,
                        k * y + k,
                        0,
                    ]
                )

        for poly, color in self.road_poly:
            colors.extend([color[0], color[1], color[2], 1] * len(poly))
            for p in poly:
                polygons_.extend([p[0], p[1], 0])

        vl = pyglet.graphics.vertex_list(
            len(polygons_) // 3, ("v3f", polygons_), ("c4f", colors)
        )  # gl.GL_QUADS,
        vl.draw(gl.GL_QUADS)
        vl.delete()

    def render_indicators(self, W, H):
        s = W / 40.0
        h = H / 40.0
        colors = [0, 0, 0, 1] * 4
        polygons = [W, 0, 0, W, 5 * h, 0, 0, 5 * h, 0, 0, 0, 0]

        def vertical_ind(place, val, color):
            colors.extend([color[0], color[1], color[2], 1] * 4)
            polygons.extend(
                [
                    place * s,
                    h + h * val,
                    0,
                    (place + 1) * s,
                    h + h * val,
                    0,
                    (place + 1) * s,
                    h,
                    0,
                    (place + 0) * s,
                    h,
                    0,
                ]
            )

        def horiz_ind(place, val, color):
            colors.extend([color[0], color[1], color[2], 1] * 4)
            polygons.extend(
                [
                    (place + 0) * s,
                    4 * h,
                    0,
                    (place + val) * s,
                    4 * h,
                    0,
                    (place + val) * s,
                    2 * h,
                    0,
                    (place + 0) * s,
                    2 * h,
                    0,
                ]
            )

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0])
            + np.square(self.car.hull.linearVelocity[1])
        )

        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
        vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1))  # ABS sensors
        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
        vl = pyglet.graphics.vertex_list(
            len(polygons) // 3, ("v3f", polygons), ("c4f", colors)
        )  # gl.GL_QUADS,
        vl.draw(gl.GL_QUADS)
        vl.delete()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()
Exemplo n.º 30
0
class CarRacing(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second': FPS
    }

    def __init__(self, seed=None, verbose=0):
        EzPickle.__init__(self)
        #self.contactListener_keepref = FrictionDetector(self)
        #self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref)
        self.world = Box2D.b2World((0, 0))
        self.id = self.seed(seed=seed)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.labels = []
        self.road = None
        self.car = None
        self.dt = 1.0 / FPS
        self.action = np.zeros((3, ))
        self.state = np.zeros((11, ))
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        self.track_width = TRACK_WIDTH
        self.fd_tile = fixtureDef(shape=polygonShape(
            vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))
        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([1, 1, 1]),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=-np.inf,
                                            high=np.inf,
                                            shape=(11, ),
                                            dtype=np.float32)
        #self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8)

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return hex(seed)

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform(
                0, 2 * math.pi * 1 / CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD
            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))

        #print "\n".join(str(h) for h in checkpoints)
        #self.road_poly = [([(tx,ty) for a,tx,ty in checkpoints], (0.7,0.7,0.9))] # uncomment this to see checkpoints

        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi
            while True:  # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break
                if not failed:
                    break
                alpha -= 2 * math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x * dest_dx + r1y * dest_dy  # destination vector projected on rad
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break
        #print "\n".join([str(t) for t in enumerate(track)])

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[
                i - 1][0] <= self.start_alpha
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" %
                  (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (x1 - TRACK_WIDTH * math.cos(beta1),
                       y1 - TRACK_WIDTH * math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH * math.cos(beta1),
                       y1 + TRACK_WIDTH * math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH * math.cos(beta2),
                       y2 - TRACK_WIDTH * math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH * math.cos(beta2),
                       y2 + TRACK_WIDTH * math.sin(beta2))
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.userData = t
            c = 0.01 * (i % 3)
            #t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.color = ROAD_COLOR
            t.road_visited = False
            t.road_friction = ROAD_FRICTION
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1),
                        y1 + side * TRACK_WIDTH * math.sin(beta1))
                b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                        y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1))
                b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2),
                        y2 + side * TRACK_WIDTH * math.sin(beta2))
                b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                        y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2))
                self.road_poly.append(([b1_l, b1_r, b2_r,
                                        b2_l], (1, 1, 1) if i % 2 == 0 else
                                       (1, 0, 0)))
        self.track = track
        return True

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print(
                    "retry to generate track (normal if there are not many of this messages)"
                )
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)

    def step(self, action):
        if action is not None:
            self.action = np.array(action)
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(self.dt)
        self.world.Step(self.dt, 6 * 30, 2 * 30)
        self.t += self.dt
        self.render("state_pixels")

        # Update vehicle state
        self.state[0:2] = self.car.hull.position
        self.state[2] = (self.car.hull.angle + np.pi / 2) % (2 * np.pi)
        self.state[3:5] = self.car.hull.linearVelocity
        self.state[5] = self.car.hull.angularVelocity
        self.state[6] = self.car.wheels[0].joint.angle
        self.state[7] = self.car.wheels[0].omega
        self.state[8] = self.car.wheels[1].omega
        self.state[9] = self.car.wheels[2].omega
        self.state[10] = self.car.wheels[3].omega

        step_reward = 0
        done = False
        if action is not None:  # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count == len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100

        return self.state

    def render(self, mode='human'):
        assert mode in ['human', 'state_pixels', 'rgb_array']
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)

            self.labels.append(
                pyglet.text.Label('Input',
                                  font_size=15,
                                  x=WINDOW_W / 16 * 3,
                                  y=WINDOW_H / 12,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('S',
                                  font_size=12,
                                  x=WINDOW_W / 64 * 7,
                                  y=WINDOW_H / 100 * 3,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('T',
                                  font_size=12,
                                  x=WINDOW_W / 64 * 14,
                                  y=WINDOW_H / 100 * 3,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('B',
                                  font_size=12,
                                  x=WINDOW_W / 64 * 17,
                                  y=WINDOW_H / 100 * 3,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('Linear Velocity',
                                  font_size=15,
                                  x=WINDOW_W / 2,
                                  y=WINDOW_H / 12,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('FL FR',
                                  font_size=12,
                                  x=WINDOW_W / 16 * 7,
                                  y=WINDOW_H / 100 * 3,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('RL RR',
                                  font_size=12,
                                  x=WINDOW_W / 2,
                                  y=WINDOW_H / 100 * 3,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('C',
                                  font_size=12,
                                  x=WINDOW_W / 64 * 37,
                                  y=WINDOW_H / 100 * 3,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('Angular Velocity',
                                  font_size=15,
                                  x=WINDOW_W / 16 * 13,
                                  y=WINDOW_H / 12,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('C',
                                  font_size=12,
                                  x=WINDOW_W / 16 * 13,
                                  y=WINDOW_H / 100 * 3,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.transform = rendering.Transform()

        if "t" not in self.__dict__: return  # reset() not called yet

        #zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1)   # Animate zoom first second
        zoom = np.clip((ZOOM * SCALE - 1) * np.power(self.t, 5) + 1, 1,
                       ZOOM * SCALE)
        zoom_state = ZOOM * SCALE * STATE_W / WINDOW_W
        zoom_video = ZOOM * SCALE * VIDEO_W / WINDOW_W
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                            scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                            scroll_y * zoom * math.cos(angle)))
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode != "state_pixels")

        arr = None
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()

        win.clear()
        t = self.transform
        if mode == 'rgb_array':
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == 'state_pixels':
            VP_W = STATE_W
            VP_H = STATE_H
        else:
            pixel_scale = 1
            if hasattr(win.context, '_nscontext'):
                pixel_scale = win.context._nscontext.view().backingScaleFactor(
                )  # pylint: disable=protected-access
            VP_W = int(pixel_scale * WINDOW_W)
            VP_H = int(pixel_scale * WINDOW_H)

        gl.glViewport(0, 0, VP_W, VP_H)
        t.enable()
        self.render_road()
        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()
        self.render_indicators(WINDOW_W, WINDOW_H)

        if mode == 'human':
            win.flip()
            return self.viewer.isopen

        image_data = pyglet.image.get_buffer_manager().get_color_buffer(
        ).get_image_data()
        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='')
        arr = arr.reshape(VP_H, VP_W, 4)
        arr = arr[::-1, :, 0:3]

        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        gl.glBegin(gl.GL_QUADS)
        #gl.glColor4f(0.4, 0.8, 0.4, 1.0)
        gl.glColor4f(0.75, 0.75, 0.75, 1.0)
        gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        #gl.glColor4f(0.4, 0.9, 0.4, 1.0)
        gl.glColor4f(0.65, 0.65, 0.65, 1.0)
        k = PLAYFIELD / 20.0
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                gl.glVertex3f(k * x + k, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + k, 0)
                gl.glVertex3f(k * x + k, k * y + k, 0)
        for i, (poly, color) in enumerate(self.road_poly):
            if i == 2:
                gl.glColor4f(1, 1, 1, 1)
            else:
                gl.glColor4f(color[0], color[1], color[2], 1)

            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)
        gl.glEnd()

    def render_indicators(self, W, H):
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(0, 0, 0, 0.2)
        gl.glVertex3f(W, 0, 0)
        gl.glVertex3f(W, H / 10, 0)
        gl.glVertex3f(0, H / 10, 0)
        gl.glVertex3f(0, 0, 0)

        w = W / 100
        h = H / 100

        def ver_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], color[3])
            gl.glVertex3f(place - 1.5 * w, h * val, 0)
            gl.glVertex3f(place + 1.5 * w, h * val, 0)
            gl.glVertex3f(place + 1.5 * w, 0, 0)
            gl.glVertex3f(place - 1.5 * w, 0, 0)

        def hor_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], color[3])
            gl.glVertex3f(place, 5 * h, 0)
            gl.glVertex3f(place + w * val, 5 * h, 0)
            gl.glVertex3f(place + w * val, h, 0)
            gl.glVertex3f(place, h, 0)

        true_speed = np.linalg.norm(self.car.hull.linearVelocity)
        hor_ind(W / 64 * 7, 7 * self.action[0], (1, 1, 0, 0.7))
        ver_ind(W / 64 * 14, 6 * self.action[1], (0, 1, 0, 0.7))
        ver_ind(W / 64 * 17, 6 * self.action[2], (1, 0, 0, 0.7))
        ver_ind(W / 16 * 7 - 1.5 * w, 0.025 * self.car.wheels[0].omega,
                (0, 0.7, 1, 0.7))
        ver_ind(W / 16 * 7 + 1.5 * w, 0.025 * self.car.wheels[1].omega,
                (0, 0.7, 1, 0.7))
        ver_ind(W / 2 - 1.5 * w, 0.025 * self.car.wheels[2].omega,
                (0, 0.5, 1, 0.7))
        ver_ind(W / 2 + 1.5 * w, 0.025 * self.car.wheels[3].omega,
                (0, 0.5, 1, 0.8))
        ver_ind(W / 64 * 37, 0.05 * true_speed, (0, 0, 1, 0.7))
        hor_ind(W / 16 * 13, -1 * self.car.hull.angularVelocity,
                (0.5, 0, 1, 0.7))
        gl.glEnd()
        for label in self.labels:
            label.draw()
Exemplo n.º 31
0
class CarRacing(gym.Env, EzPickle):
    metadata = {
        "render.modes": ["human", "rgb_array", "state_pixels"],
        "video.frames_per_second": FPS,
    }

    def __init__(self, verbose=1, obstacles=False):
        EzPickle.__init__(self)
        self.SI = SI(env=self,
                     car_shape=(4, 8),
                     image_shape=(STATE_W, STATE_H),
                     render_distance=40,
                     road_width=40 / 6,
                     fill=True,
                     interpolate=True,
                     obstacles=obstacles)
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        self.steps = 0
        self.n_obstacles = 10
        self.obstacles = obstacles
        self.dim_obstacles = (0.5, 0.5)
        self.collision_threshold = 0.1  #1m distance between obstacle and vehicle
        self.COLLISION = False
        self.fd_tile = fixtureDef(
            shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])
        )  # The fd_tile variable defines the fixture with the shape defined as a rectangle with coordinates
        # [(0, 0) | (1, 0)]
        # [(0,-1) | (1,-1)]
        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([+1, +1, +1]),
                                       dtype=np.float32)  # steer, gas, brake
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.uint8)
        self.R = lambda x, y, angle: [
            x * np.cos(angle) - y * np.sin(angle), y * np.cos(angle) + x * np.
            sin(angle)
        ]

    def _create_obstacles(self):
        "This function randomly generates obstacles along the track for the vehicle to avoid"

        obstacle_interval = np.floor(
            (len(self.track) - 1) / (self.n_obstacles)
        )  # after how many track vertices must a obstacle appear.
        count = 0
        self.obstacles_pos = np.zeros((self.n_obstacles, 4, 2))

        for i in range(len(self.track)):
            if i % obstacle_interval == 0 and count < self.n_obstacles and i > 1:
                count += 1
                alpha1, beta1, x1, y1 = self.track[i]
                alpha2, beta2, x2, y2 = self.track[i - 1]

                sign = 1 if np.random.random() < 0.5 else -1

                road1_l = (x1 - sign * self.dim_obstacles[0] * math.cos(beta1),
                           y1 - sign * self.dim_obstacles[0] * math.sin(beta1))

                road1_r = (x1 + sign * self.dim_obstacles[1] * math.cos(beta1),
                           y1 + sign * self.dim_obstacles[1] * math.sin(beta1))

                road2_l = (x2 - sign * self.dim_obstacles[0] * math.cos(beta2),
                           y2 - sign * self.dim_obstacles[0] * math.sin(beta2))

                road2_r = (x2 + sign * self.dim_obstacles[1] * math.cos(beta2),
                           y2 + sign * self.dim_obstacles[1] * math.sin(beta2))

                self.obstacle_poly.extend([
                    road1_l[0], road1_l[1], 0, road1_r[0], road1_r[1], 0,
                    road2_r[0], road2_r[1], 0, road2_l[0], road2_l[1], 0
                ])

                self.obstacles_pos[count - 1, :, 0] = np.array(
                    [road1_l[0], road1_r[0], road2_r[0], road2_l[0]])

                self.obstacles_pos[count - 1, :, 1] = np.array(
                    [road1_l[1], road1_r[1], road2_r[1], road2_l[1]])

        if len(self.obstacle_poly) // 3 == 4 * self.n_obstacles:
            return True
        else:
            print('There was a problem generating the obstacle course')
            return False

    def _create_track(self):
        "The number of checkpoints are the number of turns where the minimum is 2."

        CHECKPOINTS = 12
        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS)
            alpha = 2 * math.pi * c / CHECKPOINTS + noise
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD
            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0  # The starting x value is always =
        dest_i = 0
        laps = 0  # The number of laps required to finish the course, leave this on 0 - no lap only once through course.
        track = []
        no_freeze = 2500
        visited_other_side = False  # This indicates if the lap is completed

        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi

            while True:  # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break

                if not failed:
                    break

                alpha -= 2 * math.pi
                continue

            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            # destination vector projected on rad:
            proj = r1x * dest_dx + r1y * dest_dy
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = (track[i][0] > self.start_alpha
                                  and track[i - 1][0] <= self.start_alpha)
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" %
                  (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (
                x1 - TRACK_WIDTH * math.cos(beta1),
                y1 - TRACK_WIDTH * math.sin(beta1),
            )
            road1_r = (
                x1 + TRACK_WIDTH * math.cos(beta1),
                y1 + TRACK_WIDTH * math.sin(beta1),
            )
            road2_l = (
                x2 - TRACK_WIDTH * math.cos(beta2),
                y2 - TRACK_WIDTH * math.sin(beta2),
            )
            road2_r = (
                x2 + TRACK_WIDTH * math.cos(beta2),
                y2 + TRACK_WIDTH * math.sin(beta2),
            )
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(
                fixtures=self.fd_tile
            )  # The call of a static body may be very important and is based on the df_tile = [rl1, rr1, rl2, rr2]
            t.userData = t
            c = 0.01 * (i % 3
                        )  # This is the interchanging colors for the tiles
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0  # Here is where we can change the friction coefficient of the road from tar - offroad
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (
                    x1 + side * TRACK_WIDTH * math.cos(beta1),
                    y1 + side * TRACK_WIDTH * math.sin(beta1),
                )
                b1_r = (
                    x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                    y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1),
                )
                b2_l = (
                    x2 + side * TRACK_WIDTH * math.cos(beta2),
                    y2 + side * TRACK_WIDTH * math.sin(beta2),
                )
                b2_r = (
                    x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                    y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2),
                )
                self.road_poly.append(([b1_l, b1_r, b2_r,
                                        b2_l], (1, 1, 1) if i % 2 == 0 else
                                       (1, 0, 0)))
        self.track = track
        return True

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.obstacle_poly = []
        self.steps = 0

        while True:
            success_track = self._create_track()

            if self.obstacles:
                if success_track:
                    success_obstacles = self._create_obstacles()
                else:
                    success_obstacles = False
            else:
                success_obstacles = True  # just so it goes through to next stage

            if success_track and success_obstacles:
                break
            if self.verbose == 1:
                print("retry to generate track (normal if there are not many"
                      "instances of this message)")
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]

    def step(self, action):
        self.steps += 1
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])
        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS
        self.state = self.SI.generate_image(
        )  #self.state = self.render("state_pixels")
        step_reward = 0
        done = False

        if self.obstacles:
            self.collision()
        else:
            self.COLLISION = False

        if action is not None:  # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count == len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100

            if self.COLLISION:
                done = True
                step_reward = -100

        return self.state, step_reward, done, {}

    def render(self, mode="human"):
        assert mode in ["human", "state_pixels", "rgb_array"]
        if self.viewer is None:
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label(
                "0000",
                font_size=36,
                x=20,
                y=WINDOW_H * 2.5 / 40.00,
                anchor_x="left",
                anchor_y="center",
                color=(255, 255, 255, 255),
            )

            self.transform = rendering.Transform()

        if "t" not in self.__dict__:
            return  # reset() not called yet

        # Animate zoom first second:
        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1)
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                            scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                            scroll_y * zoom * math.cos(angle)),
        )
        self.transform.set_rotation(angle)
        self.car.draw(self.viewer, mode != "state_pixels")
        arr = None

        #--------
        VP_W = STATE_W
        VP_H = STATE_H
        #--------
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()
        win.clear()

        t = self.transform
        if mode == "rgb_array":
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == "state_pixels":
            VP_W = STATE_W
            VP_H = STATE_H
        else:
            pixel_scale = 1
            if hasattr(win.context, "_nscontext"):
                pixel_scale = (
                    win.context._nscontext.view().backingScaleFactor())  # pylint: disable=protected-access
            VP_W = int(pixel_scale * WINDOW_W)
            VP_H = int(pixel_scale * WINDOW_H)

        gl.glViewport(0, 0, VP_W, VP_H)
        t.enable()

        self.render_road()
        if self.obstacles:
            self.render_obstacles()
            self.render_collision()

        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()

        self.render_indicators(WINDOW_W, WINDOW_H)
        if mode == "human":
            win.flip()
            return self.viewer.isopen

        image_data = (pyglet.image.get_buffer_manager().get_color_buffer().
                      get_image_data())
        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep="")
        arr = arr.reshape(VP_H, VP_W, 4)
        arr = arr[::-1, :, 0:3]

        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        colors = [0.4, 0.8, 0.4, 1.0] * 4
        polygons_ = [
            +PLAYFIELD,
            +PLAYFIELD,
            0,
            +PLAYFIELD,
            -PLAYFIELD,
            0,
            -PLAYFIELD,
            -PLAYFIELD,
            0,
            -PLAYFIELD,
            +PLAYFIELD,
            0,
        ]

        k = PLAYFIELD / 20.0
        colors.extend([0.4, 0.9, 0.4, 1.0] * 4 * 20 * 20)
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                polygons_.extend([
                    k * x + k,
                    k * y + 0,
                    0,
                    k * x + 0,
                    k * y + 0,
                    0,
                    k * x + 0,
                    k * y + k,
                    0,
                    k * x + k,
                    k * y + k,
                    0,
                ])

        for poly, color in self.road_poly:  # self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color))
            colors.extend([color[0], color[1], color[2], 1] * len(poly))
            for p in poly:
                polygons_.extend([p[0], p[1], 0])

        vl = pyglet.graphics.vertex_list(
            len(polygons_) // 3,
            ("v3f", polygons_),
            (
                "c4f", colors
            )  # gl.GL_QUADS, # The // 3 is dividing by 3 but obtaining only the integer value
        )
        vl.draw(gl.GL_QUADS)

    def render_indicators(self, W, H):
        s = W / 40.0
        h = H / 40.0
        colors = [0, 0, 0, 1] * 4
        polygons = [W, 0, 0, W, 5 * h, 0, 0, 5 * h, 0, 0, 0, 0]

        def vertical_ind(place, val, color):
            colors.extend([color[0], color[1], color[2], 1] * 4)
            polygons.extend([
                place * s,
                h + h * val,
                0,
                (place + 1) * s,
                h + h * val,
                0,
                (place + 1) * s,
                h,
                0,
                (place + 0) * s,
                h,
                0,
            ])

        def horiz_ind(place, val, color):
            colors.extend([color[0], color[1], color[2], 1] * 4)
            polygons.extend([
                (place + 0) * s,
                4 * h,
                0,
                (place + val) * s,
                4 * h,
                0,
                (place + val) * s,
                2 * h,
                0,
                (place + 0) * s,
                2 * h,
                0,
            ])

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))

        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
        vertical_ind(7, 0.01 * self.car.wheels[0].omega,
                     (0.0, 0, 1))  # ABS sensors
        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
        vl = pyglet.graphics.vertex_list(
            len(polygons) // 3,
            ("v3f", polygons),
            ("c4f", colors)  # gl.GL_QUADS,
        )
        vl.draw(gl.GL_QUADS)
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()

    def render_obstacles(self):
        "This function is responsible for rendering all the obstacles randomly in the course"

        # RGB for all 4 vertices by the number
        C = [255, 5, 5, 255, 5, 5, 255, 5, 5, 255, 5, 5] * self.n_obstacles

        # Divide by 3 because there are 3 components x,y,z
        v2 = pyglet.graphics.vertex_list(
            len(self.obstacle_poly) // 3, ('v3f', self.obstacle_poly),
            ('c3B', C))
        v2.draw(gl.GL_QUADS)

    def render_collision(self):
        if self.COLLISION:
            x, y = self.car.hull.position
            t_angle = self.car.hull.angle
            x1 = (self.R(2, 3, t_angle)[0]) + (x)
            y1 = (self.R(2, 3, t_angle)[1]) + (y)
            x2 = (self.R(-2, 3, t_angle)[0]) + (x)
            y2 = (self.R(-2, 3, t_angle)[1]) + (y)
            x3 = self.R(-2, -3, t_angle)[0] + (x)
            y3 = self.R(-2, -3, t_angle)[1] + (y)
            x4 = self.R(2, -3, t_angle)[0] + (x)
            y4 = self.R(2, -3, t_angle)[1] + (y)

            V = [x1, y1, 0, x2, y2, 0, x3, y3, 0, x4, y4, 0]
            C = [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255]

            v3 = pyglet.graphics.vertex_list(4, ('v3f', V), ('c3B', C))
            v3.draw(gl.GL_QUADS)

    def collision(self):
        " This function determines whether a collision with an obstacle has occurred or not "
        " The vehicle cannot reverse, so we are only interested in the front two components of the vehicles hull"

        x, y = self.car.hull.position
        t_angle = self.car.hull.angle

        x1 = self.R(2, 3, t_angle)[0] + x
        y1 = self.R(2, 3, t_angle)[1] + y

        x2 = self.R(-2, 3, t_angle)[0] + x
        y2 = self.R(-2, 3, t_angle)[1] + y

        x3 = self.R(-2, -3, t_angle)[0] + x
        y3 = self.R(-2, -3, t_angle)[1] + y

        x4 = self.R(2, -3, t_angle)[0] + x
        y4 = self.R(2, -3, t_angle)[1] + y

        midpoints_upper = [
            (self.obstacles_pos[:, 2, 0] + self.obstacles_pos[:, 3, 0]) / 2,
            (self.obstacles_pos[:, 2, 1] + self.obstacles_pos[:, 3, 1]) / 2
        ]
        midpoints_lower = [
            (self.obstacles_pos[:, 2, 0] + self.obstacles_pos[:, 3, 0]) / 2,
            (self.obstacles_pos[:, 2, 1] + self.obstacles_pos[:, 3, 1]) / 2
        ]

        distance_1 = np.sqrt((x1 - self.obstacles_pos[:, :, 0])**2 +
                             (y1 - self.obstacles_pos[:, :, 1])**2)
        distance_2 = np.sqrt((x2 - self.obstacles_pos[:, :, 0])**2 +
                             (y2 - self.obstacles_pos[:, :, 1])**2)
        distance_3 = np.sqrt((x3 - self.obstacles_pos[:, :, 0])**2 +
                             (y3 - self.obstacles_pos[:, :, 1])**2)
        distance_4 = np.sqrt((x4 - self.obstacles_pos[:, :, 0])**2 +
                             (y4 - self.obstacles_pos[:, :, 1])**2)

        u_midpoint_1 = np.sqrt((x1 - midpoints_upper[0])**2 +
                               (y1 - midpoints_upper[1])**2) * 0.5
        u_midpoint_2 = np.sqrt((x2 - midpoints_upper[0])**2 +
                               (y2 - midpoints_upper[1])**2) * 0.5
        u_midpoint_3 = np.sqrt((x3 - midpoints_upper[0])**2 +
                               (y3 - midpoints_upper[1])**2) * 0.5
        u_midpoint_4 = np.sqrt((x4 - midpoints_upper[0])**2 +
                               (y4 - midpoints_upper[1])**2) * 0.5

        l_midpoint_1 = np.sqrt((x1 - midpoints_lower[0])**2 +
                               (y1 - midpoints_lower[1])**2) * 0.5
        l_midpoint_2 = np.sqrt((x2 - midpoints_lower[0])**2 +
                               (y2 - midpoints_lower[1])**2) * 0.5
        l_midpoint_3 = np.sqrt((x3 - midpoints_lower[0])**2 +
                               (y3 - midpoints_lower[1])**2) * 0.5
        l_midpoint_4 = np.sqrt((x4 - midpoints_lower[0])**2 +
                               (y4 - midpoints_lower[1])**2) * 0.5

        smallest_distance = np.min([
            np.min(distance_1),
            np.min(distance_2),
            np.min(distance_3),
            np.min(distance_4),
            np.min(u_midpoint_1),
            np.min(u_midpoint_2),
            np.min(u_midpoint_3),
            np.min(u_midpoint_4),
            np.min(l_midpoint_1),
            np.min(l_midpoint_2),
            np.min(l_midpoint_3),
            np.min(l_midpoint_4)
        ])

        if smallest_distance < self.collision_threshold:
            self.COLLISION = True
        else:
            self.COLLISION = False
Exemplo n.º 32
0
class CarRacing(gym.Env, EzPickle):
    """
    ### Description
    The easiest continuous control task to learn from pixels - a top-down
    racing environment. Discrete control is reasonable in this environment as
    well; on/off discretization is fine.

    The game is solved when the agent consistently gets 900+ points.
    The generated track is random every episode.

    Some indicators are shown at the bottom of the window along with the
    state RGB buffer. From left to right: true speed, four ABS sensors,
    steering wheel position, and gyroscope.
    To play yourself (it's rather fast for humans), type:
    ```
    python gym/envs/box2d/car_racing.py
    ```
    Remember: it's a powerful rear-wheel drive car - don't press the accelerator
    and turn at the same time.

    ### Action Space
    There are 3 actions: steering (-1 is full left, +1 is full right), gas,
    and breaking.

    ### Observation Space
    State consists of 96x96 pixels.

    ### Rewards
    The reward is -0.1 every frame and +1000/N for every track tile visited,
    where N is the total number of tiles visited in the track. For example,
    if you have finished in 732 frames, your reward is
    1000 - 0.1*732 = 926.8 points.

    ### Starting State
    The car starts at rest in the center of the road.

    ### Episode Termination
    The episode finishes when all of the tiles are visited. The car can also go
    outside of the playfield - that is, far off the track, in which case it will
    receive -100 reward and die.

    ### Arguments
    There are no arguments supported in constructing the environment.

    ### Version History
    - v0: Current version

    ### References
    - Chris Campbell (2014), http://www.iforce2d.net/b2dtut/top-down-car.

    ### Credits
    Created by Oleg Klimov
    """

    metadata = {
        "render_modes": ["human", "rgb_array", "state_pixels"],
        "render_fps": FPS,
    }

    def __init__(self, verbose=1, lap_complete_percent=0.95):
        EzPickle.__init__(self)
        pygame.init()
        self.contactListener_keepref = FrictionDetector(
            self, lap_complete_percent)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.screen = None
        self.clock = None
        self.isopen = True
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        self.new_lap = False
        self.fd_tile = fixtureDef(shape=polygonShape(
            vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))

        # This will throw a warning in tests/envs/test_envs in utils/env_checker.py as the space is not symmetric
        #   or normalised however this is not possible here so ignore
        self.action_space = spaces.Box(
            np.array([-1, 0, 0]).astype(np.float32),
            np.array([+1, +1, +1]).astype(np.float32),
        )  # steer, gas, brake

        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.uint8)

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS)
            alpha = 2 * math.pi * c / CHECKPOINTS + noise
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)

            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD

            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi

            while True:  # Find destination from checkpoints
                failed = True

                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break

                if not failed:
                    break

                alpha -= 2 * math.pi
                continue

            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            # destination vector projected on rad:
            proj = r1x * dest_dx + r1y * dest_dy
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = (track[i][0] > self.start_alpha
                                  and track[i - 1][0] <= self.start_alpha)
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" %
                  (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (
                x1 - TRACK_WIDTH * math.cos(beta1),
                y1 - TRACK_WIDTH * math.sin(beta1),
            )
            road1_r = (
                x1 + TRACK_WIDTH * math.cos(beta1),
                y1 + TRACK_WIDTH * math.sin(beta1),
            )
            road2_l = (
                x2 - TRACK_WIDTH * math.cos(beta2),
                y2 - TRACK_WIDTH * math.sin(beta2),
            )
            road2_r = (
                x2 + TRACK_WIDTH * math.cos(beta2),
                y2 + TRACK_WIDTH * math.sin(beta2),
            )
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.userData = t
            c = 0.01 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.idx = i
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (
                    x1 + side * TRACK_WIDTH * math.cos(beta1),
                    y1 + side * TRACK_WIDTH * math.sin(beta1),
                )
                b1_r = (
                    x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                    y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1),
                )
                b2_l = (
                    x2 + side * TRACK_WIDTH * math.cos(beta2),
                    y2 + side * TRACK_WIDTH * math.sin(beta2),
                )
                b2_r = (
                    x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                    y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2),
                )
                self.road_poly.append(([b1_l, b1_r, b2_r,
                                        b2_l], (1, 1, 1) if i % 2 == 0 else
                                       (1, 0, 0)))
        self.track = track
        return True

    def reset(
        self,
        *,
        seed: Optional[int] = None,
        return_info: bool = False,
        options: Optional[dict] = None,
    ):
        super().reset(seed=seed)
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.new_lap = False
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print("retry to generate track (normal if there are not many"
                      "instances of this message)")
        self.car = Car(self.world, *self.track[0][1:4])

        if not return_info:
            return self.step(None)[0]
        else:
            return self.step(None)[0], {}

    def step(self, action):
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS

        self.state = self.render("state_pixels")

        step_reward = 0
        done = False
        if action is not None:  # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count == len(self.track) or self.new_lap:
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100

        return self.state, step_reward, done, {}

    def render(self, mode="human"):
        assert mode in ["human", "state_pixels", "rgb_array"]
        if self.screen is None and mode == "human":
            pygame.display.init()
            self.screen = pygame.display.set_mode((WINDOW_W, WINDOW_H))
        if self.clock is None:
            self.clock = pygame.time.Clock()

        if "t" not in self.__dict__:
            return  # reset() not called yet

        self.surf = pygame.Surface((WINDOW_W, WINDOW_H))

        # computing transformations
        angle = -self.car.hull.angle
        # Animating first second zoom.
        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1)
        scroll_x = -(self.car.hull.position[0] + PLAYFIELD) * zoom
        scroll_y = -(self.car.hull.position[1] + PLAYFIELD) * zoom
        trans = pygame.math.Vector2((scroll_x, scroll_y)).rotate_rad(angle)
        trans = (WINDOW_W / 2 + trans[0], WINDOW_H / 4 + trans[1])

        self.render_road(zoom, trans, angle)
        self.car.draw(self.surf, zoom, trans, angle, mode != "state_pixels")

        self.surf = pygame.transform.flip(self.surf, False, True)

        # showing stats
        self.render_indicators(WINDOW_W, WINDOW_H)

        font = pygame.font.Font(pygame.font.get_default_font(), 42)
        text = font.render("%04i" % self.reward, True, (255, 255, 255),
                           (0, 0, 0))
        text_rect = text.get_rect()
        text_rect.center = (60, WINDOW_H - WINDOW_H * 2.5 / 40.0)
        self.surf.blit(text, text_rect)

        if mode == "human":
            pygame.event.pump()
            self.clock.tick(self.metadata["render_fps"])
            self.screen.fill(0)
            self.screen.blit(self.surf, (0, 0))
            pygame.display.flip()

        if mode == "rgb_array":
            return self._create_image_array(self.surf, (VIDEO_W, VIDEO_H))
        elif mode == "state_pixels":
            return self._create_image_array(self.surf, (STATE_W, STATE_H))
        else:
            return self.isopen

    def render_road(self, zoom, translation, angle):
        bounds = PLAYFIELD
        field = [
            (2 * bounds, 2 * bounds),
            (2 * bounds, 0),
            (0, 0),
            (0, 2 * bounds),
        ]
        trans_field = []
        self.draw_colored_polygon(self.surf, field, (102, 204, 102), zoom,
                                  translation, angle)

        k = bounds / (20.0)
        grass = []
        for x in range(0, 40, 2):
            for y in range(0, 40, 2):
                grass.append([
                    (k * x + k, k * y + 0),
                    (k * x + 0, k * y + 0),
                    (k * x + 0, k * y + k),
                    (k * x + k, k * y + k),
                ])
        for poly in grass:
            self.draw_colored_polygon(self.surf, poly, (102, 230, 102), zoom,
                                      translation, angle)

        for poly, color in self.road_poly:
            # converting to pixel coordinates
            poly = [(p[0] + PLAYFIELD, p[1] + PLAYFIELD) for p in poly]
            color = [int(c * 255) for c in color]
            self.draw_colored_polygon(self.surf, poly, color, zoom,
                                      translation, angle)

    def render_indicators(self, W, H):
        s = W / 40.0
        h = H / 40.0
        color = (0, 0, 0)
        polygon = [(W, H), (W, H - 5 * h), (0, H - 5 * h), (0, H)]
        pygame.draw.polygon(self.surf, color=color, points=polygon)

        def vertical_ind(place, val):
            return [
                (place * s, H - (h + h * val)),
                ((place + 1) * s, H - (h + h * val)),
                ((place + 1) * s, H - h),
                ((place + 0) * s, H - h),
            ]

        def horiz_ind(place, val):
            return [
                ((place + 0) * s, H - 4 * h),
                ((place + val) * s, H - 4 * h),
                ((place + val) * s, H - 2 * h),
                ((place + 0) * s, H - 2 * h),
            ]

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))

        # simple wrapper to render if the indicator value is above a threshold
        def render_if_min(value, points, color):
            if abs(value) > 1e-4:
                pygame.draw.polygon(self.surf, points=points, color=color)

        render_if_min(true_speed, vertical_ind(5, 0.02 * true_speed),
                      (255, 255, 255))
        # ABS sensors
        render_if_min(
            self.car.wheels[0].omega,
            vertical_ind(7, 0.01 * self.car.wheels[0].omega),
            (0, 0, 255),
        )
        render_if_min(
            self.car.wheels[1].omega,
            vertical_ind(8, 0.01 * self.car.wheels[1].omega),
            (0, 0, 255),
        )
        render_if_min(
            self.car.wheels[2].omega,
            vertical_ind(9, 0.01 * self.car.wheels[2].omega),
            (51, 0, 255),
        )
        render_if_min(
            self.car.wheels[3].omega,
            vertical_ind(10, 0.01 * self.car.wheels[3].omega),
            (51, 0, 255),
        )

        render_if_min(
            self.car.wheels[0].joint.angle,
            horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle),
            (0, 255, 0),
        )
        render_if_min(
            self.car.hull.angularVelocity,
            horiz_ind(30, -0.8 * self.car.hull.angularVelocity),
            (255, 0, 0),
        )

    def draw_colored_polygon(self, surface, poly, color, zoom, translation,
                             angle):
        poly = [pygame.math.Vector2(c).rotate_rad(angle) for c in poly]
        poly = [(c[0] * zoom + translation[0], c[1] * zoom + translation[1])
                for c in poly]
        gfxdraw.aapolygon(self.surf, poly, color)
        gfxdraw.filled_polygon(self.surf, poly, color)

    def _create_image_array(self, screen, size):
        scaled_screen = pygame.transform.smoothscale(screen, size)
        return np.transpose(np.array(pygame.surfarray.pixels3d(scaled_screen)),
                            axes=(1, 0, 2))

    def close(self):
        pygame.quit()
        if self.screen is not None:
            pygame.display.quit()
            self.isopen = False