Exemplo n.º 1
0
class CarRacingFix:
    assert gym.__version__ <= '0.17.1'

    def __init__(self, verbose=1):
        self.contactListener_keep_ref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keep_ref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose

        self.fd_tile = Box2D.b2FixtureDef(shape=Box2D.b2PolygonShape(
            vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))

        self.state_temp = None  # Yonv1943
        self.tile_visited_count = 0
        self.road_poly = []
        self.transform = None
        self.t = None
        self.num_step = 0

        self.env_name = 'CarRacingFix'
        self.state_dim = (STATE_W, STATE_H, 3 * 2)
        self.action_dim = 6
        self.if_discrete = False
        self.max_step = 512
        self.target_return = 950
        self.action_max = 1

    def reset(self):
        self.num_step = 1

        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            # if self.verbose == 1:
            #     print("retry to generate track (normal if there are not many of this messages)")
        self.car = Car(self.world, *self.track[0][1:4])

        self.state_temp = np.zeros((STATE_W, STATE_H, 3), dtype=np.uint8)
        return self.old_step((0, 0, 0))[0]

    def step(self, action):
        try:
            reward0 = self.old_step(action[:3], if_draw=False)[1]
            state, reward1, done, info_dict = self.old_step(action[3:],
                                                            if_draw=True)
            reward = reward0 + reward1
        except Exception as error:
            print(f"| CarRacingFix Error: {error}")
            state = np.stack((self.state_temp, self.state_temp))
            reward = 0
            done = True
            info_dict = dict()

        self.num_step += 1
        if self.num_step == self.max_step:
            done = True
        return state, reward, done, info_dict

    def old_step(self, action, if_draw=True):
        self.car.steer(action[0])
        self.car.gas(action[1])  # np.clip(gas, 0, 1)
        self.car.brake(action[2])

        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS

        self.reward -= 0.1
        # We actually don't want to count fuel spent, we want car to be faster.
        # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
        # self.car.fuel_spent = 0.0

        done = False
        # x, y = self.car.hull.position
        # if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
        #     done = True
        #     step_reward = -100  # Ynv1943: it is a bad design
        if if_draw:
            state = self.render("state_pixels")
            if not (32 < state[16:96, 16:96, 1].mean() <
                    211):  # penalize when outside of road
                # print(f"{state[16:96, 16:96, 1].mean():.3f}")
                self.reward -= 2.0
                done = True
            if self.tile_visited_count == len(self.track):
                done = True
            stack_state = np.concatenate((self.state_temp, state), axis=2)
            self.state_temp = state
        else:
            stack_state = None
        step_reward = self.reward - self.prev_reward
        self.prev_reward = self.reward

        return stack_state, step_reward, done, {}

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render(self, mode='human'):
        assert mode in ['human', 'state_pixels']
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            # self.score_label = pyglet.text.Label('0000', font_size=36,
            #                                      x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x='left', anchor_y='center',
            #                                      color=(255, 255, 255, 255))
            self.transform = rendering.Transform()
        if self.t is None:
            return None

        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(
            self.t, 1)  # Animate zoom first second
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                            scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                            scroll_y * zoom * math.cos(angle)))
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode != "state_pixels")

        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()

        win.clear()
        t = self.transform
        if mode == 'state_pixels':
            vp_w = STATE_W
            vp_h = STATE_H
        else:
            context_nscontext = getattr(win.context, '_nscontext', None)
            pixel_scale = 1 if context_nscontext is None else context_nscontext.view(
            ).backingScaleFactor()
            # pylint: disable=protected-access
            vp_w = int(pixel_scale * WINDOW_W)
            vp_h = int(pixel_scale * WINDOW_H)

        gl.glViewport(0, 0, vp_w, vp_h)
        t.enable()
        self.render_road()
        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()
        # self.render_indicators(WINDOW_W, WINDOW_H)

        if mode == 'human':
            win.flip()
            return self.viewer.isopen

        image_data = pyglet.image.get_buffer_manager().get_color_buffer(
        ).get_image_data()
        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='')
        arr = arr.reshape((vp_h, vp_w, 4))[:, :, :3]
        return arr

    def render_road(self):
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(0.4, 0.8, 0.4, 1.0)
        gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        gl.glColor4f(0.4, 0.9, 0.4, 1.0)
        k = PLAYFIELD / 20.0
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                gl.glVertex3f(k * x + k, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + k, 0)
                gl.glVertex3f(k * x + k, k * y + k, 0)
        for poly, color in self.road_poly:
            gl.glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)
        gl.glEnd()

    def render_indicators(self, w, h):
        gl.glBegin(gl.GL_QUADS)
        s = w / 40.0
        h = h / 40.0
        gl.glColor4f(0, 0, 0, 1)
        gl.glVertex3f(w, 0, 0)
        gl.glVertex3f(w, 5 * h, 0)
        gl.glVertex3f(0, 5 * h, 0)
        gl.glVertex3f(0, 0, 0)

        def vertical_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h, 0)
            gl.glVertex3f((place + 0) * s, h, 0)

        def horiz_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 2 * h, 0)
            gl.glVertex3f((place + 0) * s, 2 * h, 0)

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
        vertical_ind(7, 0.01 * self.car.wheels[0].omega,
                     (0.0, 0, 1))  # ABS sensors
        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
        gl.glEnd()
        # self.score_label.text = "%04i" % self.reward
        # self.score_label.draw()

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        check_point = 12

        # Create checkpoints
        checkpoints = []
        for c in range(check_point):
            alpha = 2 * math.pi * c / check_point + rd.uniform(
                0, 2 * math.pi * 1 / check_point)
            rad = rd.uniform(TRACK_RAD / 3, TRACK_RAD)
            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == check_point - 1:
                alpha = 2 * math.pi * c / check_point
                self.start_alpha = 2 * math.pi * (-0.5) / check_point
                rad = 1.5 * TRACK_RAD
            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))

        # print "\n".join(str(h) for h in checkpoints)
        # self.road_poly = [ (    # uncomment this to see checkpoints
        #    [ (tx,ty) for a,tx,ty in checkpoints ],
        #    (0.7,0.7,0.9) ) ]
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi
            while True:  # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break
                if not failed:
                    break
                alpha -= 2 * math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x * dest_dx + r1y * dest_dy  # destination vector projected on rad
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break
        # print "\n".join([str(t) for t in enumerate(track)])

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha >= track[i -
                                                                         1][0]
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        # if self.verbose == 1:  # Yonv1943
        #     print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (x1 - TRACK_WIDTH * math.cos(beta1),
                       y1 - TRACK_WIDTH * math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH * math.cos(beta1),
                       y1 + TRACK_WIDTH * math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH * math.cos(beta2),
                       y2 - TRACK_WIDTH * math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH * math.cos(beta2),
                       y2 + TRACK_WIDTH * math.sin(beta2))
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.userData = t
            c = 0.01 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1),
                        y1 + side * TRACK_WIDTH * math.sin(beta1))
                b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                        y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1))
                b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2),
                        y2 + side * TRACK_WIDTH * math.sin(beta2))
                b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                        y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2))
                self.road_poly.append(([b1_l, b1_r, b2_r,
                                        b2_l], (1, 1, 1) if i % 2 == 0 else
                                       (1, 0, 0)))
        self.track = track
        return True
Exemplo n.º 2
0
class Environment(gym.Env, EzPickle):
    def __init__(self, verbose=False):
        EzPickle.__init__(self)

        # General and utils variables
        self.verbose = verbose
        self.np_random = None
        self.seed()

        # Box2D variables
        self.time = -1.0  # Set time to -1.0 to indicate that models is not ready yet
        self.car = None
        self.contact_listener = ContactListener(self)
        self.world = b2World((0, 0), contactListener=self.contact_listener)
        self.ground = None
        self.track_tiles_coordinates = None  # For easy access in StateTransformer
        self.track_tiles = []
        self.cones = []
        self.tile_visited_count = 0

        # PyGLet variables
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.score_label = None
        self.transform = None

        # RL-related variables
        # action_space has the following structure (steer, gas, brake). -1, +1 is for left and right steering
        self.state = None
        self.done = False
        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([+1, +1, +1]),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.uint8)
        self.reward = 0.0
        self.prev_reward = 0.0

    def step(self, action):
        # Track previous reward before it gets updated
        self.prev_reward = self.reward

        car = self.car
        world = self.world

        # Apply action
        if action is not None:
            car.steer(-action[0])
            car.gas(action[1])
            car.brake(action[2])
        car.step(1.0 / FPS)
        world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        # Update elapsed time
        self.time += 1.0 / FPS
        # Since we are assuming car to have infinite fuel, always set fuel_spent to 0
        # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
        car.fuel_spent = 0.0

        # Calculate step reward
        step_reward = 0
        # Penalty for stopping and wasting time
        self.reward -= 0.1
        # Compute step reward and update previous reward
        step_reward += self.reward - self.prev_reward  # Current recorded reward minus previous reward

        # Check if done
        if self.tile_visited_count == len(self.track_tiles):
            self.done = True

        # Penalise further and terminate if car is out of bounds
        x, y = car.hull.position
        if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
            self.done = True
            step_reward -= 100

        self.state = StateTransformer.transform(self)

        return self.state, step_reward, self.done, {}

    def reset(self):
        self._destroy()
        self.time = -1.0
        self.tile_visited_count = 0
        self.state = None
        self.done = False
        self.reward = 0.0
        self.prev_reward = 0.0

        # Build ground
        self.ground = Ground(self.world, PLAYFIELD, PLAYFIELD)

        # Build track tiles
        self.track_tiles_coordinates = TrackCoordinatesBuilder.load_track(self)
        self.track_tiles = [
            TrackTile(self.world, self.track_tiles_coordinates[i],
                      self.track_tiles_coordinates[i - 1])
            for i, element in enumerate(self.track_tiles_coordinates)
        ]
        # Build cones
        cones_coordinates = []
        for i in range(0, len(self.track_tiles)):
            sensor_vertices = self.track_tiles[i].b2Data.fixtures[
                0].shape.vertices
            for j in range(0, len(sensor_vertices)):
                cones_coordinates.append(sensor_vertices[j])
        self.cones = [
            Cone(world=self.world,
                 position=(cone_coordinate[0], cone_coordinate[1]))
            for cone_coordinate in cones_coordinates
        ]

        init_angle = 0
        init_x, init_y = self.track_tiles[0].position

        self.car = Car(self.world,
                       init_angle=init_angle,
                       init_x=init_x,
                       init_y=init_y)

        return self.step(None)[0]

    def render(self, mode='human'):
        assert mode in ['human', 'state_pixels', 'rgb_array']

        # Instantiate viewer
        if self.viewer is None:
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label('0000',
                                                 font_size=36,
                                                 x=20,
                                                 y=WINDOW_H * 2.5 / 40.00,
                                                 anchor_x='left',
                                                 anchor_y='center',
                                                 color=(255, 255, 255, 255))
            self.transform = rendering.Transform()

        # reset() not called yet, so no need to render
        if self.time == -1.0:
            return

        self.car.draw(self.viewer, mode != "state_pixels")
        self.transform = follower_view_transform(self.car, self.time)

        # Setup window
        window = self.viewer.window
        window.switch_to()
        window.dispatch_events()
        window.clear()
        VP_W, VP_H = get_viewport_size(mode, window)

        # Start drawing
        gl.glViewport(0, 0, VP_W, VP_H)
        # Transform view to follow the car and render the contents of the world
        self.transform.enable()
        self.render_world()
        # Render onetime geometries
        for geom in self.viewer.onetime_geoms:
            geom.render()
        # And empty the geometries afterwards
        self.viewer.onetime_geoms = []
        # Since the world has been rendered, and indicators below are not part of the world, disable transform
        self.transform.disable()
        render_indicators(WINDOW_W,
                          WINDOW_H,
                          car=self.car,
                          reward=self.reward,
                          score_label=self.score_label)

        if mode == 'human':
            window.flip()
            return self.viewer.isopen
        else:
            image_data = pyglet.image.get_buffer_manager().get_color_buffer(
            ).get_image_data()
            arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='')
            arr = arr.reshape(VP_H, VP_W, 4)
            arr = arr[::-1, :, 0:3]
            return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.track_tiles:
            return
        self.world.DestroyBody(self.ground.b2Data)
        for track_tile in self.track_tiles:
            self.world.DestroyBody(track_tile.b2Data)
        self.track_tiles = []
        self.car.destroy()

    def render_world(self):
        gl.glBegin(gl.GL_QUADS)

        self.ground.render()

        for tile in self.track_tiles:
            tile.render()

        for cone in self.cones:
            cone.render()

        gl.glEnd()