Exemplo n.º 1
0
class CarRacing(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second': FPS
    }

    def __init__(self):
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0

        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([+1, +1, +1]),
                                       dtype=np.float32)  # steer, gas, brake
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.uint8)

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road: return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform(
                0, 2 * math.pi * 1 / CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD
            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))

        #print "\n".join(str(h) for h in checkpoints)
        #self.road_poly = [ (    # uncomment this to see checkpoints
        #    [ (tx,ty) for a,tx,ty in checkpoints ],
        #    (0.7,0.7,0.9) ) ]
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while 1:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi
            while True:  # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0: break
                if not failed: break
                alpha -= 2 * math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x * dest_dx + r1y * dest_dy  # destination vector projected on rad
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4: break
            no_freeze -= 1
            if no_freeze == 0: break
        #print "\n".join([str(t) for t in enumerate(track)])

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0: return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[
                i - 1][0] <= self.start_alpha
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        #print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (x1 - TRACK_WIDTH * math.cos(beta1),
                       y1 - TRACK_WIDTH * math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH * math.cos(beta1),
                       y1 + TRACK_WIDTH * math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH * math.cos(beta2),
                       y2 - TRACK_WIDTH * math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH * math.cos(beta2),
                       y2 + TRACK_WIDTH * math.sin(beta2))
            t = self.world.CreateStaticBody(fixtures=fixtureDef(
                shape=polygonShape(
                    vertices=[road1_l, road1_r, road2_r, road2_l])))
            t.userData = t
            c = 0.01 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1),
                        y1 + side * TRACK_WIDTH * math.sin(beta1))
                b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                        y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1))
                b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2),
                        y2 + side * TRACK_WIDTH * math.sin(beta2))
                b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                        y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2))
                self.road_poly.append(([b1_l, b1_r, b2_r,
                                        b2_l], (1, 1, 1) if i % 2 == 0 else
                                       (1, 0, 0)))
        self.track = track
        return True

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.human_render = False

        while True:
            success = self._create_track()
            if success: break
            #print("retry to generate track (normal if there are not many of this messages)")
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]

    def step(self, action):
        # print(self.t * FPS)
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS

        self.state = self.render("state_pixels")

        step_reward = 0
        done = False
        if action is not None:  # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            #self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count == len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100

        return self.state, step_reward, done, {}

    def render(self, mode='human'):
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label('0000',
                                                 font_size=36,
                                                 x=20,
                                                 y=WINDOW_H * 2.5 / 40.00,
                                                 anchor_x='left',
                                                 anchor_y='center',
                                                 color=(255, 255, 255, 255))
            self.transform = rendering.Transform()

        if "t" not in self.__dict__: return  # reset() not called yet

        zoom = ZOOM * SCALE
        zoom_state = ZOOM * SCALE * STATE_W / WINDOW_W
        zoom_video = ZOOM * SCALE * VIDEO_W / WINDOW_W
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                            scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                            scroll_y * zoom * math.cos(angle)))
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode != "state_pixels")

        arr = None
        win = self.viewer.window
        if mode != 'state_pixels':
            win.switch_to()
            win.dispatch_events()
        if mode == "rgb_array" or mode == "state_pixels":
            win.clear()
            t = self.transform
            if mode == 'rgb_array':
                VP_W = VIDEO_W
                VP_H = VIDEO_H
            else:
                VP_W = STATE_W
                VP_H = STATE_H
            gl.glViewport(0, 0, VP_W, VP_H)
            t.enable()
            self.render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()
            t.disable()
            self.render_indicators(WINDOW_W,
                                   WINDOW_H)  # TODO: find why 2x needed, wtf
            image_data = pyglet.image.get_buffer_manager().get_color_buffer(
            ).get_image_data()
            arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
            arr = arr.reshape(VP_H, VP_W, 4)
            arr = arr[::-1, :, 0:3]

        if mode == "rgb_array" and not self.human_render:  # agent can call or not call env.render() itself when recording video.
            win.flip()

        if mode == 'human':
            self.human_render = True
            win.clear()
            t = self.transform
            gl.glViewport(0, 0, WINDOW_W, WINDOW_H)
            t.enable()
            self.render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()
            t.disable()
            self.render_indicators(WINDOW_W, WINDOW_H)
            win.flip()

        self.viewer.onetime_geoms = []
        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(0.4, 0.8, 0.4, 1.0)
        gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        gl.glColor4f(0.4, 0.9, 0.4, 1.0)
        k = PLAYFIELD / 20.0
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                gl.glVertex3f(k * x + k, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + k, 0)
                gl.glVertex3f(k * x + k, k * y + k, 0)
        for poly, color in self.road_poly:
            gl.glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)
        gl.glEnd()

    def render_indicators(self, W, H):
        gl.glBegin(gl.GL_QUADS)
        s = W / 40.0
        h = H / 40.0
        gl.glColor4f(0, 0, 0, 1)
        gl.glVertex3f(W, 0, 0)
        gl.glVertex3f(W, 5 * h, 0)
        gl.glVertex3f(0, 5 * h, 0)
        gl.glVertex3f(0, 0, 0)

        def vertical_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h, 0)
            gl.glVertex3f((place + 0) * s, h, 0)

        def horiz_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 2 * h, 0)
            gl.glVertex3f((place + 0) * s, 2 * h, 0)

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
        vertical_ind(7, 0.01 * self.car.wheels[0].omega,
                     (0.0, 0, 1))  # ABS sensors
        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
        gl.glEnd()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()
Exemplo n.º 2
0
class CarRacing(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second': FPS
    }

    def __init__(self, verbose=1):
        EzPickle.__init__(self)
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        self.fd_tile = fixtureDef(shape=polygonShape(
            vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))

        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([+1, +1, +1]),
                                       dtype=np.float32)  # steer, gas, brake
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.uint8)

        CHECKPOINTS = 12
        # Create checkpoints
        # TODO Use a real way to keep a constant track across training runs
        self.checkpoints = []
        self.checkpoints.append((0, 225.0, 0.0))
        self.checkpoints.append(
            (0.7825323624208509, 89.6427647192468, 89.1304349066121))
        self.checkpoints.append(
            (1.5543323243350344, 1.783985395462951, 108.34693482727236))
        self.checkpoints.append(
            (1.6057305460922464, -2.2173644459530517, 63.446740574663174))
        self.checkpoints.append(
            (2.6175081644916047, -58.76396976672586, 33.965461046114534))
        self.checkpoints.append(
            (2.7871461118931458, -134.63944761816262, 49.82679389320398))
        self.checkpoints.append(
            (3.414113547480756, -106.41825645850612, -29.741137759708423))
        self.checkpoints.append(
            (3.8745797378794, -77.61403468584427, -69.87679530100709))
        self.checkpoints.append(
            (4.193711736042842, -33.56139367373087, -58.79641863577176))
        self.checkpoints.append(
            (4.928823629511352, 29.852520836123745, -135.76810358020867))
        self.checkpoints.append(
            (5.29734709463665, 68.99766439052978, -104.18233435806235))
        self.checkpoints.append(
            (5.759586531581287, 194.85571585149864, -112.5000000000001))

        self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
        '''
        self.checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD)
            if c==0:
                alpha = 0
                rad = 1.5*TRACK_RAD
            if c==CHECKPOINTS-1:
                alpha = 2*math.pi*c/CHECKPOINTS
                
                rad = 1.5*TRACK_RAD
            self.checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) )
        '''

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        checkpoints = self.checkpoints

        #print ("\n".join(str(h) for h in checkpoints))
        # self.road_poly = [ (    # uncomment this to see checkpoints
        #    [ (tx,ty) for a,tx,ty in checkpoints ],
        #    (0.7,0.7,0.9) ) ]
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi
            while True:  # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break
                if not failed:
                    break
                alpha -= 2 * math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x * dest_dx + r1y * dest_dy  # destination vector projected on rad
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break
        # print "\n".join([str(t) for t in enumerate(track)])

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[
                i - 1][0] <= self.start_alpha
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" %
                  (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        '''
        border = [False]*len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i-neg-0][1]
                beta2 = track[i-neg-1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i-neg] |= border[i]
        '''

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (x1 - TRACK_WIDTH * math.cos(beta1),
                       y1 - TRACK_WIDTH * math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH * math.cos(beta1),
                       y1 + TRACK_WIDTH * math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH * math.cos(beta2),
                       y2 - TRACK_WIDTH * math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH * math.cos(beta2),
                       y2 + TRACK_WIDTH * math.sin(beta2))
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.userData = t
            c = 0.01 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            '''
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side* TRACK_WIDTH        *math.cos(beta1), y1 + side* TRACK_WIDTH        *math.sin(beta1))
                b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1))
                b2_l = (x2 + side* TRACK_WIDTH        *math.cos(beta2), y2 + side* TRACK_WIDTH        *math.sin(beta2))
                b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2))
                self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) ))
            '''
        self.track = track
        return True

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print(
                    "retry to generate track (normal if there are not many of this messages)"
                )
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]

    def step(self, action):
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS

        self.state = self.render("state_pixels")

        step_reward = 0
        done = False
        if action is not None:  # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count == len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100
            # TODO: Exit if far off track

        return self.state, step_reward, done, {}

    def render(self, mode='human'):
        assert mode in ['human', 'state_pixels', 'rgb_array']

        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label('0000',
                                                 font_size=36,
                                                 x=20,
                                                 y=WINDOW_H * 2.5 / 40.00,
                                                 anchor_x='left',
                                                 anchor_y='center',
                                                 color=(255, 255, 255, 255))
            self.transform = rendering.Transform()

        if "t" not in self.__dict__: return  # reset() not called yet

        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(
            self.t, 1)  # Animate zoom first second
        zoom_state = ZOOM * SCALE * STATE_W / WINDOW_W
        zoom_video = ZOOM * SCALE * VIDEO_W / WINDOW_W
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        #if np.linalg.norm(vel) > 0.5:
        #    angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                            scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                            scroll_y * zoom * math.cos(angle)))
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode != "state_pixels")

        arr = None
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()

        win.clear()
        t = self.transform
        if mode == 'rgb_array':
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == 'state_pixels':
            VP_W = STATE_W
            VP_H = STATE_H
        else:
            pixel_scale = 1
            if hasattr(win.context, '_nscontext'):
                pixel_scale = win.context._nscontext.view().backingScaleFactor(
                )  # pylint: disable=protected-access
            VP_W = int(pixel_scale * WINDOW_W)
            VP_H = int(pixel_scale * WINDOW_H)

        gl.glViewport(0, 0, VP_W, VP_H)
        t.enable()
        self.render_road()
        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()
        # self.render_indicators(WINDOW_W, WINDOW_H)

        if mode == 'human':
            win.flip()
            return self.viewer.isopen

        image_data = pyglet.image.get_buffer_manager().get_color_buffer(
        ).get_image_data()
        arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
        arr = arr.reshape(VP_H, VP_W, 4)
        arr = arr[::-1, :, 0:3]

        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(0.4, 0.8, 0.4, 1.0)
        gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        gl.glColor4f(0.4, 0.9, 0.4, 1.0)
        k = PLAYFIELD / 20.0
        #for x in range(-20, 20, 2):
        #    for y in range(-20, 20, 2):
        #        gl.glVertex3f(k*x + k, k*y + 0, 0)
        #        gl.glVertex3f(k*x + 0, k*y + 0, 0)
        #        gl.glVertex3f(k*x + 0, k*y + k, 0)
        #        gl.glVertex3f(k*x + k, k*y + k, 0)
        for poly, color in self.road_poly:
            gl.glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)
        gl.glEnd()

    def render_indicators(self, W, H):
        gl.glBegin(gl.GL_QUADS)
        s = W / 40.0
        h = H / 40.0
        gl.glColor4f(0, 0, 0, 1)
        gl.glVertex3f(W, 0, 0)
        gl.glVertex3f(W, 5 * h, 0)
        gl.glVertex3f(0, 5 * h, 0)
        gl.glVertex3f(0, 0, 0)

        def vertical_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h, 0)
            gl.glVertex3f((place + 0) * s, h, 0)

        def horiz_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 2 * h, 0)
            gl.glVertex3f((place + 0) * s, 2 * h, 0)

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
        vertical_ind(7, 0.01 * self.car.wheels[0].omega,
                     (0.0, 0, 1))  # ABS sensors
        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
        gl.glEnd()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()
Exemplo n.º 3
0
class CarRacing(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second' : FPS
    }

    def __init__(self, verbose=1):
        EzPickle.__init__(self)
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose

        self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), dtype=np.float32)  # steer, gas, brake
        self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8)

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road: return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD)
            if c==0:
                alpha = 0
                rad = 1.5*TRACK_RAD
            if c==CHECKPOINTS-1:
                alpha = 2*math.pi*c/CHECKPOINTS
                self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS
                rad = 1.5*TRACK_RAD
            checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) )

        #print "\n".join(str(h) for h in checkpoints)
        #self.road_poly = [ (    # uncomment this to see checkpoints
        #    [ (tx,ty) for a,tx,ty in checkpoints ],
        #    (0.7,0.7,0.9) ) ]
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5*TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while 1:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2*math.pi
            while True: # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0: break
                if not failed: break
                alpha -= 2*math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x*dest_dx + r1y*dest_dy  # destination vector projected on rad
            while beta - alpha >  1.5*math.pi: beta -= 2*math.pi
            while beta - alpha < -1.5*math.pi: beta += 2*math.pi
            prev_beta = beta
            proj *= SCALE
            if proj >  0.3: beta -= min(TRACK_TURN_RATE, abs(0.001*proj))
            if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001*proj))
            x += p1x*TRACK_DETAIL_STEP
            y += p1y*TRACK_DETAIL_STEP
            track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) )
            if laps > 4: break
            no_freeze -= 1
            if no_freeze==0: break
        #print "\n".join([str(t) for t in enumerate(track)])

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i==0: return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha
            if pass_through_start and i2==-1:
                i2 = i
            elif pass_through_start and i1==-1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1))
        assert i1!=-1
        assert i2!=-1

        track = track[i1:i2-1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square( first_perp_x*(track[0][2] - track[-1][2]) ) +
            np.square( first_perp_y*(track[0][3] - track[-1][3]) ))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False]*len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i-neg-0][1]
                beta2 = track[i-neg-1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i-neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i-1]
            road1_l = (x1 - TRACK_WIDTH*math.cos(beta1), y1 - TRACK_WIDTH*math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH*math.cos(beta1), y1 + TRACK_WIDTH*math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH*math.cos(beta2), y2 - TRACK_WIDTH*math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH*math.cos(beta2), y2 + TRACK_WIDTH*math.sin(beta2))
            t = self.world.CreateStaticBody( fixtures = fixtureDef(
                shape=polygonShape(vertices=[road1_l, road1_r, road2_r, road2_l])
                ))
            t.userData = t
            c = 0.01*(i%3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(( [road1_l, road1_r, road2_r, road2_l], t.color ))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side* TRACK_WIDTH        *math.cos(beta1), y1 + side* TRACK_WIDTH        *math.sin(beta1))
                b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1))
                b2_l = (x2 + side* TRACK_WIDTH        *math.cos(beta2), y2 + side* TRACK_WIDTH        *math.sin(beta2))
                b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2))
                self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) ))
        self.track = track
        return True

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success: break
            if self.verbose == 1:
                print("retry to generate track (normal if there are not many of this messages)")
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]

    def step(self, action):
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(1.0/FPS)
        self.world.Step(1.0/FPS, 6*30, 2*30)
        self.t += 1.0/FPS

        self.state = self.render("state_pixels")

        step_reward = 0
        done = False
        if action is not None: # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            #self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count==len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100

        return self.state, step_reward, done, {}

    def render(self, mode='human'):
        assert mode in ['human', 'state_pixels', 'rgb_array']
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label('0000', font_size=36,
                x=20, y=WINDOW_H*2.5/40.00, anchor_x='left', anchor_y='center',
                color=(255,255,255,255))
            self.transform = rendering.Transform()

        if "t" not in self.__dict__: return  # reset() not called yet

        zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1)   # Animate zoom first second
        zoom_state  = ZOOM*SCALE*STATE_W/WINDOW_W
        zoom_video  = ZOOM*SCALE*VIDEO_W/WINDOW_W
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)),
            WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) )
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode!="state_pixels")

        arr = None
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()

        win.clear()
        t = self.transform
        if mode=='rgb_array':
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == 'state_pixels':
            VP_W = STATE_W
            VP_H = STATE_H
        else:
            pixel_scale = 1
            if hasattr(win.context, '_nscontext'):
                pixel_scale = win.context._nscontext.view().backingScaleFactor()  # pylint: disable=protected-access
            VP_W = pixel_scale * WINDOW_W
            VP_H = pixel_scale * WINDOW_H

        gl.glViewport(0, 0, VP_W, VP_H)
        t.enable()
        self.render_road()
        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()
        self.render_indicators(WINDOW_W, WINDOW_H)

        if mode == 'human':
            win.flip()
            return self.viewer.isopen

        image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
        arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
        arr = arr.reshape(VP_H, VP_W, 4)
        arr = arr[::-1, :, 0:3]

        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(0.4, 0.8, 0.4, 1.0)
        gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        gl.glColor4f(0.4, 0.9, 0.4, 1.0)
        k = PLAYFIELD/20.0
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                gl.glVertex3f(k*x + k, k*y + 0, 0)
                gl.glVertex3f(k*x + 0, k*y + 0, 0)
                gl.glVertex3f(k*x + 0, k*y + k, 0)
                gl.glVertex3f(k*x + k, k*y + k, 0)
        for poly, color in self.road_poly:
            gl.glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)
        gl.glEnd()

    def render_indicators(self, W, H):
        gl.glBegin(gl.GL_QUADS)
        s = W/40.0
        h = H/40.0
        gl.glColor4f(0,0,0,1)
        gl.glVertex3f(W, 0, 0)
        gl.glVertex3f(W, 5*h, 0)
        gl.glVertex3f(0, 5*h, 0)
        gl.glVertex3f(0, 0, 0)
        def vertical_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place+0)*s, h + h*val, 0)
            gl.glVertex3f((place+1)*s, h + h*val, 0)
            gl.glVertex3f((place+1)*s, h, 0)
            gl.glVertex3f((place+0)*s, h, 0)
        def horiz_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place+0)*s, 4*h , 0)
            gl.glVertex3f((place+val)*s, 4*h, 0)
            gl.glVertex3f((place+val)*s, 2*h, 0)
            gl.glVertex3f((place+0)*s, 2*h, 0)
        true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(5, 0.02*true_speed, (1,1,1))
        vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors
        vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1))
        vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1))
        vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1))
        horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0))
        horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0))
        gl.glEnd()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()
Exemplo n.º 4
0
class CarRacing(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second': FPS
    }

    def __init__(self, seed=None, verbose=0):
        EzPickle.__init__(self)
        #self.contactListener_keepref = FrictionDetector(self)
        #self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref)
        self.world = Box2D.b2World((0, 0))
        self.id = self.seed(seed=seed)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.labels = []
        self.road = None
        self.car = None
        self.dt = 1.0 / FPS
        self.action = np.zeros((3, ))
        self.state = np.zeros((11, ))
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        self.track_width = TRACK_WIDTH
        self.fd_tile = fixtureDef(shape=polygonShape(
            vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))
        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([1, 1, 1]),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=-np.inf,
                                            high=np.inf,
                                            shape=(11, ),
                                            dtype=np.float32)
        #self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8)

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return hex(seed)

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform(
                0, 2 * math.pi * 1 / CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD
            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))

        #print "\n".join(str(h) for h in checkpoints)
        #self.road_poly = [([(tx,ty) for a,tx,ty in checkpoints], (0.7,0.7,0.9))] # uncomment this to see checkpoints

        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi
            while True:  # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break
                if not failed:
                    break
                alpha -= 2 * math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x * dest_dx + r1y * dest_dy  # destination vector projected on rad
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break
        #print "\n".join([str(t) for t in enumerate(track)])

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[
                i - 1][0] <= self.start_alpha
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" %
                  (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (x1 - TRACK_WIDTH * math.cos(beta1),
                       y1 - TRACK_WIDTH * math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH * math.cos(beta1),
                       y1 + TRACK_WIDTH * math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH * math.cos(beta2),
                       y2 - TRACK_WIDTH * math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH * math.cos(beta2),
                       y2 + TRACK_WIDTH * math.sin(beta2))
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.userData = t
            c = 0.01 * (i % 3)
            #t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.color = ROAD_COLOR
            t.road_visited = False
            t.road_friction = ROAD_FRICTION
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1),
                        y1 + side * TRACK_WIDTH * math.sin(beta1))
                b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                        y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1))
                b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2),
                        y2 + side * TRACK_WIDTH * math.sin(beta2))
                b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                        y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2))
                self.road_poly.append(([b1_l, b1_r, b2_r,
                                        b2_l], (1, 1, 1) if i % 2 == 0 else
                                       (1, 0, 0)))
        self.track = track
        return True

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print(
                    "retry to generate track (normal if there are not many of this messages)"
                )
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)

    def step(self, action):
        if action is not None:
            self.action = np.array(action)
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(self.dt)
        self.world.Step(self.dt, 6 * 30, 2 * 30)
        self.t += self.dt
        self.render("state_pixels")

        # Update vehicle state
        self.state[0:2] = self.car.hull.position
        self.state[2] = (self.car.hull.angle + np.pi / 2) % (2 * np.pi)
        self.state[3:5] = self.car.hull.linearVelocity
        self.state[5] = self.car.hull.angularVelocity
        self.state[6] = self.car.wheels[0].joint.angle
        self.state[7] = self.car.wheels[0].omega
        self.state[8] = self.car.wheels[1].omega
        self.state[9] = self.car.wheels[2].omega
        self.state[10] = self.car.wheels[3].omega

        step_reward = 0
        done = False
        if action is not None:  # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count == len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100

        return self.state

    def render(self, mode='human'):
        assert mode in ['human', 'state_pixels', 'rgb_array']
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)

            self.labels.append(
                pyglet.text.Label('Input',
                                  font_size=15,
                                  x=WINDOW_W / 16 * 3,
                                  y=WINDOW_H / 12,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('S',
                                  font_size=12,
                                  x=WINDOW_W / 64 * 7,
                                  y=WINDOW_H / 100 * 3,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('T',
                                  font_size=12,
                                  x=WINDOW_W / 64 * 14,
                                  y=WINDOW_H / 100 * 3,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('B',
                                  font_size=12,
                                  x=WINDOW_W / 64 * 17,
                                  y=WINDOW_H / 100 * 3,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('Linear Velocity',
                                  font_size=15,
                                  x=WINDOW_W / 2,
                                  y=WINDOW_H / 12,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('FL FR',
                                  font_size=12,
                                  x=WINDOW_W / 16 * 7,
                                  y=WINDOW_H / 100 * 3,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('RL RR',
                                  font_size=12,
                                  x=WINDOW_W / 2,
                                  y=WINDOW_H / 100 * 3,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('C',
                                  font_size=12,
                                  x=WINDOW_W / 64 * 37,
                                  y=WINDOW_H / 100 * 3,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('Angular Velocity',
                                  font_size=15,
                                  x=WINDOW_W / 16 * 13,
                                  y=WINDOW_H / 12,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.labels.append(
                pyglet.text.Label('C',
                                  font_size=12,
                                  x=WINDOW_W / 16 * 13,
                                  y=WINDOW_H / 100 * 3,
                                  anchor_x='center',
                                  anchor_y='center',
                                  color=(255, 255, 255, 255)))

            self.transform = rendering.Transform()

        if "t" not in self.__dict__: return  # reset() not called yet

        #zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1)   # Animate zoom first second
        zoom = np.clip((ZOOM * SCALE - 1) * np.power(self.t, 5) + 1, 1,
                       ZOOM * SCALE)
        zoom_state = ZOOM * SCALE * STATE_W / WINDOW_W
        zoom_video = ZOOM * SCALE * VIDEO_W / WINDOW_W
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                            scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                            scroll_y * zoom * math.cos(angle)))
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode != "state_pixels")

        arr = None
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()

        win.clear()
        t = self.transform
        if mode == 'rgb_array':
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == 'state_pixels':
            VP_W = STATE_W
            VP_H = STATE_H
        else:
            pixel_scale = 1
            if hasattr(win.context, '_nscontext'):
                pixel_scale = win.context._nscontext.view().backingScaleFactor(
                )  # pylint: disable=protected-access
            VP_W = int(pixel_scale * WINDOW_W)
            VP_H = int(pixel_scale * WINDOW_H)

        gl.glViewport(0, 0, VP_W, VP_H)
        t.enable()
        self.render_road()
        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()
        self.render_indicators(WINDOW_W, WINDOW_H)

        if mode == 'human':
            win.flip()
            return self.viewer.isopen

        image_data = pyglet.image.get_buffer_manager().get_color_buffer(
        ).get_image_data()
        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='')
        arr = arr.reshape(VP_H, VP_W, 4)
        arr = arr[::-1, :, 0:3]

        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        gl.glBegin(gl.GL_QUADS)
        #gl.glColor4f(0.4, 0.8, 0.4, 1.0)
        gl.glColor4f(0.75, 0.75, 0.75, 1.0)
        gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        #gl.glColor4f(0.4, 0.9, 0.4, 1.0)
        gl.glColor4f(0.65, 0.65, 0.65, 1.0)
        k = PLAYFIELD / 20.0
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                gl.glVertex3f(k * x + k, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + k, 0)
                gl.glVertex3f(k * x + k, k * y + k, 0)
        for i, (poly, color) in enumerate(self.road_poly):
            if i == 2:
                gl.glColor4f(1, 1, 1, 1)
            else:
                gl.glColor4f(color[0], color[1], color[2], 1)

            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)
        gl.glEnd()

    def render_indicators(self, W, H):
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(0, 0, 0, 0.2)
        gl.glVertex3f(W, 0, 0)
        gl.glVertex3f(W, H / 10, 0)
        gl.glVertex3f(0, H / 10, 0)
        gl.glVertex3f(0, 0, 0)

        w = W / 100
        h = H / 100

        def ver_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], color[3])
            gl.glVertex3f(place - 1.5 * w, h * val, 0)
            gl.glVertex3f(place + 1.5 * w, h * val, 0)
            gl.glVertex3f(place + 1.5 * w, 0, 0)
            gl.glVertex3f(place - 1.5 * w, 0, 0)

        def hor_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], color[3])
            gl.glVertex3f(place, 5 * h, 0)
            gl.glVertex3f(place + w * val, 5 * h, 0)
            gl.glVertex3f(place + w * val, h, 0)
            gl.glVertex3f(place, h, 0)

        true_speed = np.linalg.norm(self.car.hull.linearVelocity)
        hor_ind(W / 64 * 7, 7 * self.action[0], (1, 1, 0, 0.7))
        ver_ind(W / 64 * 14, 6 * self.action[1], (0, 1, 0, 0.7))
        ver_ind(W / 64 * 17, 6 * self.action[2], (1, 0, 0, 0.7))
        ver_ind(W / 16 * 7 - 1.5 * w, 0.025 * self.car.wheels[0].omega,
                (0, 0.7, 1, 0.7))
        ver_ind(W / 16 * 7 + 1.5 * w, 0.025 * self.car.wheels[1].omega,
                (0, 0.7, 1, 0.7))
        ver_ind(W / 2 - 1.5 * w, 0.025 * self.car.wheels[2].omega,
                (0, 0.5, 1, 0.7))
        ver_ind(W / 2 + 1.5 * w, 0.025 * self.car.wheels[3].omega,
                (0, 0.5, 1, 0.8))
        ver_ind(W / 64 * 37, 0.05 * true_speed, (0, 0, 1, 0.7))
        hor_ind(W / 16 * 13, -1 * self.car.hull.angularVelocity,
                (0.5, 0, 1, 0.7))
        gl.glEnd()
        for label in self.labels:
            label.draw()
Exemplo n.º 5
0
class CarRacing(gym.Env, EzPickle):
    metadata = {
        "render.modes": ["human", "rgb_array", "state_pixels"],
        "video.frames_per_second": FPS,
    }

    def __init__(self, verbose=1):
        EzPickle.__init__(self)
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        self.fd_tile = fixtureDef(
            shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])
        )

        self.action_space = spaces.Box(
            np.array([-1, 0, 0]).astype(np.float32),
            np.array([+1, +1, +1]).astype(np.float32),
        )  # steer, gas, brake

        self.observation_space = spaces.Box(
            low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8
        )

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS)
            alpha = 2 * math.pi * c / CHECKPOINTS + noise
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)

            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD

            checkpoints.append((alpha, rad * math.cos(alpha), rad * math.sin(alpha)))
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi

            while True:  # Find destination from checkpoints
                failed = True

                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break

                if not failed:
                    break

                alpha -= 2 * math.pi
                continue

            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            # destination vector projected on rad:
            proj = r1x * dest_dx + r1y * dest_dy
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = (
                track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha
            )
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1 : i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2]))
            + np.square(first_perp_y * (track[0][3] - track[-1][3]))
        )
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (
                x1 - TRACK_WIDTH * math.cos(beta1),
                y1 - TRACK_WIDTH * math.sin(beta1),
            )
            road1_r = (
                x1 + TRACK_WIDTH * math.cos(beta1),
                y1 + TRACK_WIDTH * math.sin(beta1),
            )
            road2_l = (
                x2 - TRACK_WIDTH * math.cos(beta2),
                y2 - TRACK_WIDTH * math.sin(beta2),
            )
            road2_r = (
                x2 + TRACK_WIDTH * math.cos(beta2),
                y2 + TRACK_WIDTH * math.sin(beta2),
            )
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.userData = t
            c = 0.01 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (
                    x1 + side * TRACK_WIDTH * math.cos(beta1),
                    y1 + side * TRACK_WIDTH * math.sin(beta1),
                )
                b1_r = (
                    x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                    y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1),
                )
                b2_l = (
                    x2 + side * TRACK_WIDTH * math.cos(beta2),
                    y2 + side * TRACK_WIDTH * math.sin(beta2),
                )
                b2_r = (
                    x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                    y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2),
                )
                self.road_poly.append(
                    ([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))
                )
        self.track = track
        return True

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print(
                    "retry to generate track (normal if there are not many"
                    "instances of this message)"
                )
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]

    def step(self, action):
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS

        self.state = self.render("state_pixels")

        step_reward = 0
        done = False
        if action is not None:  # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count == len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100

        return self.state, step_reward, done, {}

    def render(self, mode="human"):
        assert mode in ["human", "state_pixels", "rgb_array"]
        if self.viewer is None:
            from gym.envs.classic_control import rendering

            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label(
                "0000",
                font_size=36,
                x=20,
                y=WINDOW_H * 2.5 / 40.00,
                anchor_x="left",
                anchor_y="center",
                color=(255, 255, 255, 255),
            )
            self.transform = rendering.Transform()

        if "t" not in self.__dict__:
            return  # reset() not called yet

        # Animate zoom first second:
        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1)
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2
            - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4
            - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle)),
        )
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode != "state_pixels")

        arr = None
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()

        win.clear()
        t = self.transform
        if mode == "rgb_array":
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == "state_pixels":
            VP_W = STATE_W
            VP_H = STATE_H
        else:
            pixel_scale = 1
            if hasattr(win.context, "_nscontext"):
                pixel_scale = (
                    win.context._nscontext.view().backingScaleFactor()
                )  # pylint: disable=protected-access
            VP_W = int(pixel_scale * WINDOW_W)
            VP_H = int(pixel_scale * WINDOW_H)

        gl.glViewport(0, 0, VP_W, VP_H)
        t.enable()
        self.render_road()
        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()
        self.render_indicators(WINDOW_W, WINDOW_H)

        if mode == "human":
            win.flip()
            return self.viewer.isopen

        image_data = (
            pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
        )
        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep="")
        arr = arr.reshape(VP_H, VP_W, 4)
        arr = arr[::-1, :, 0:3]

        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        colors = [0.4, 0.8, 0.4, 1.0] * 4
        polygons_ = [
            +PLAYFIELD,
            +PLAYFIELD,
            0,
            +PLAYFIELD,
            -PLAYFIELD,
            0,
            -PLAYFIELD,
            -PLAYFIELD,
            0,
            -PLAYFIELD,
            +PLAYFIELD,
            0,
        ]

        k = PLAYFIELD / 20.0
        colors.extend([0.4, 0.9, 0.4, 1.0] * 4 * 20 * 20)
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                polygons_.extend(
                    [
                        k * x + k,
                        k * y + 0,
                        0,
                        k * x + 0,
                        k * y + 0,
                        0,
                        k * x + 0,
                        k * y + k,
                        0,
                        k * x + k,
                        k * y + k,
                        0,
                    ]
                )

        for poly, color in self.road_poly:
            colors.extend([color[0], color[1], color[2], 1] * len(poly))
            for p in poly:
                polygons_.extend([p[0], p[1], 0])

        vl = pyglet.graphics.vertex_list(
            len(polygons_) // 3, ("v3f", polygons_), ("c4f", colors)
        )  # gl.GL_QUADS,
        vl.draw(gl.GL_QUADS)
        vl.delete()

    def render_indicators(self, W, H):
        s = W / 40.0
        h = H / 40.0
        colors = [0, 0, 0, 1] * 4
        polygons = [W, 0, 0, W, 5 * h, 0, 0, 5 * h, 0, 0, 0, 0]

        def vertical_ind(place, val, color):
            colors.extend([color[0], color[1], color[2], 1] * 4)
            polygons.extend(
                [
                    place * s,
                    h + h * val,
                    0,
                    (place + 1) * s,
                    h + h * val,
                    0,
                    (place + 1) * s,
                    h,
                    0,
                    (place + 0) * s,
                    h,
                    0,
                ]
            )

        def horiz_ind(place, val, color):
            colors.extend([color[0], color[1], color[2], 1] * 4)
            polygons.extend(
                [
                    (place + 0) * s,
                    4 * h,
                    0,
                    (place + val) * s,
                    4 * h,
                    0,
                    (place + val) * s,
                    2 * h,
                    0,
                    (place + 0) * s,
                    2 * h,
                    0,
                ]
            )

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0])
            + np.square(self.car.hull.linearVelocity[1])
        )

        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
        vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1))  # ABS sensors
        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
        vl = pyglet.graphics.vertex_list(
            len(polygons) // 3, ("v3f", polygons), ("c4f", colors)
        )  # gl.GL_QUADS,
        vl.draw(gl.GL_QUADS)
        vl.delete()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()
Exemplo n.º 6
0
class CarRacing(gym.Env, EzPickle):
    """
    ### Description
    The easiest continuous control task to learn from pixels - a top-down
    racing environment. Discrete control is reasonable in this environment as
    well; on/off discretization is fine.

    The game is solved when the agent consistently gets 900+ points.
    The generated track is random every episode.

    Some indicators are shown at the bottom of the window along with the
    state RGB buffer. From left to right: true speed, four ABS sensors,
    steering wheel position, and gyroscope.
    To play yourself (it's rather fast for humans), type:
    ```
    python gym/envs/box2d/car_racing.py
    ```
    Remember: it's a powerful rear-wheel drive car - don't press the accelerator
    and turn at the same time.

    ### Action Space
    There are 3 actions: steering (-1 is full left, +1 is full right), gas,
    and breaking.

    ### Observation Space
    State consists of 96x96 pixels.

    ### Rewards
    The reward is -0.1 every frame and +1000/N for every track tile visited,
    where N is the total number of tiles visited in the track. For example,
    if you have finished in 732 frames, your reward is
    1000 - 0.1*732 = 926.8 points.

    ### Starting State
    The car starts at rest in the center of the road.

    ### Episode Termination
    The episode finishes when all of the tiles are visited. The car can also go
    outside of the playfield - that is, far off the track, in which case it will
    receive -100 reward and die.

    ### Arguments
    There are no arguments supported in constructing the environment.

    ### Version History
    - v0: Current version

    ### References
    - Chris Campbell (2014), http://www.iforce2d.net/b2dtut/top-down-car.

    ### Credits
    Created by Oleg Klimov
    """

    metadata = {
        "render_modes": ["human", "rgb_array", "state_pixels"],
        "render_fps": FPS,
    }

    def __init__(self, verbose=1, lap_complete_percent=0.95):
        EzPickle.__init__(self)
        pygame.init()
        self.contactListener_keepref = FrictionDetector(
            self, lap_complete_percent)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.screen = None
        self.clock = None
        self.isopen = True
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        self.new_lap = False
        self.fd_tile = fixtureDef(shape=polygonShape(
            vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))

        # This will throw a warning in tests/envs/test_envs in utils/env_checker.py as the space is not symmetric
        #   or normalised however this is not possible here so ignore
        self.action_space = spaces.Box(
            np.array([-1, 0, 0]).astype(np.float32),
            np.array([+1, +1, +1]).astype(np.float32),
        )  # steer, gas, brake

        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.uint8)

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS)
            alpha = 2 * math.pi * c / CHECKPOINTS + noise
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)

            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD

            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi

            while True:  # Find destination from checkpoints
                failed = True

                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break

                if not failed:
                    break

                alpha -= 2 * math.pi
                continue

            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            # destination vector projected on rad:
            proj = r1x * dest_dx + r1y * dest_dy
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = (track[i][0] > self.start_alpha
                                  and track[i - 1][0] <= self.start_alpha)
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" %
                  (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (
                x1 - TRACK_WIDTH * math.cos(beta1),
                y1 - TRACK_WIDTH * math.sin(beta1),
            )
            road1_r = (
                x1 + TRACK_WIDTH * math.cos(beta1),
                y1 + TRACK_WIDTH * math.sin(beta1),
            )
            road2_l = (
                x2 - TRACK_WIDTH * math.cos(beta2),
                y2 - TRACK_WIDTH * math.sin(beta2),
            )
            road2_r = (
                x2 + TRACK_WIDTH * math.cos(beta2),
                y2 + TRACK_WIDTH * math.sin(beta2),
            )
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.userData = t
            c = 0.01 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.idx = i
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (
                    x1 + side * TRACK_WIDTH * math.cos(beta1),
                    y1 + side * TRACK_WIDTH * math.sin(beta1),
                )
                b1_r = (
                    x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                    y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1),
                )
                b2_l = (
                    x2 + side * TRACK_WIDTH * math.cos(beta2),
                    y2 + side * TRACK_WIDTH * math.sin(beta2),
                )
                b2_r = (
                    x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                    y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2),
                )
                self.road_poly.append(([b1_l, b1_r, b2_r,
                                        b2_l], (1, 1, 1) if i % 2 == 0 else
                                       (1, 0, 0)))
        self.track = track
        return True

    def reset(
        self,
        *,
        seed: Optional[int] = None,
        return_info: bool = False,
        options: Optional[dict] = None,
    ):
        super().reset(seed=seed)
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.new_lap = False
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print("retry to generate track (normal if there are not many"
                      "instances of this message)")
        self.car = Car(self.world, *self.track[0][1:4])

        if not return_info:
            return self.step(None)[0]
        else:
            return self.step(None)[0], {}

    def step(self, action):
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS

        self.state = self.render("state_pixels")

        step_reward = 0
        done = False
        if action is not None:  # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count == len(self.track) or self.new_lap:
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100

        return self.state, step_reward, done, {}

    def render(self, mode="human"):
        assert mode in ["human", "state_pixels", "rgb_array"]
        if self.screen is None and mode == "human":
            pygame.display.init()
            self.screen = pygame.display.set_mode((WINDOW_W, WINDOW_H))
        if self.clock is None:
            self.clock = pygame.time.Clock()

        if "t" not in self.__dict__:
            return  # reset() not called yet

        self.surf = pygame.Surface((WINDOW_W, WINDOW_H))

        # computing transformations
        angle = -self.car.hull.angle
        # Animating first second zoom.
        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1)
        scroll_x = -(self.car.hull.position[0] + PLAYFIELD) * zoom
        scroll_y = -(self.car.hull.position[1] + PLAYFIELD) * zoom
        trans = pygame.math.Vector2((scroll_x, scroll_y)).rotate_rad(angle)
        trans = (WINDOW_W / 2 + trans[0], WINDOW_H / 4 + trans[1])

        self.render_road(zoom, trans, angle)
        self.car.draw(self.surf, zoom, trans, angle, mode != "state_pixels")

        self.surf = pygame.transform.flip(self.surf, False, True)

        # showing stats
        self.render_indicators(WINDOW_W, WINDOW_H)

        font = pygame.font.Font(pygame.font.get_default_font(), 42)
        text = font.render("%04i" % self.reward, True, (255, 255, 255),
                           (0, 0, 0))
        text_rect = text.get_rect()
        text_rect.center = (60, WINDOW_H - WINDOW_H * 2.5 / 40.0)
        self.surf.blit(text, text_rect)

        if mode == "human":
            pygame.event.pump()
            self.clock.tick(self.metadata["render_fps"])
            self.screen.fill(0)
            self.screen.blit(self.surf, (0, 0))
            pygame.display.flip()

        if mode == "rgb_array":
            return self._create_image_array(self.surf, (VIDEO_W, VIDEO_H))
        elif mode == "state_pixels":
            return self._create_image_array(self.surf, (STATE_W, STATE_H))
        else:
            return self.isopen

    def render_road(self, zoom, translation, angle):
        bounds = PLAYFIELD
        field = [
            (2 * bounds, 2 * bounds),
            (2 * bounds, 0),
            (0, 0),
            (0, 2 * bounds),
        ]
        trans_field = []
        self.draw_colored_polygon(self.surf, field, (102, 204, 102), zoom,
                                  translation, angle)

        k = bounds / (20.0)
        grass = []
        for x in range(0, 40, 2):
            for y in range(0, 40, 2):
                grass.append([
                    (k * x + k, k * y + 0),
                    (k * x + 0, k * y + 0),
                    (k * x + 0, k * y + k),
                    (k * x + k, k * y + k),
                ])
        for poly in grass:
            self.draw_colored_polygon(self.surf, poly, (102, 230, 102), zoom,
                                      translation, angle)

        for poly, color in self.road_poly:
            # converting to pixel coordinates
            poly = [(p[0] + PLAYFIELD, p[1] + PLAYFIELD) for p in poly]
            color = [int(c * 255) for c in color]
            self.draw_colored_polygon(self.surf, poly, color, zoom,
                                      translation, angle)

    def render_indicators(self, W, H):
        s = W / 40.0
        h = H / 40.0
        color = (0, 0, 0)
        polygon = [(W, H), (W, H - 5 * h), (0, H - 5 * h), (0, H)]
        pygame.draw.polygon(self.surf, color=color, points=polygon)

        def vertical_ind(place, val):
            return [
                (place * s, H - (h + h * val)),
                ((place + 1) * s, H - (h + h * val)),
                ((place + 1) * s, H - h),
                ((place + 0) * s, H - h),
            ]

        def horiz_ind(place, val):
            return [
                ((place + 0) * s, H - 4 * h),
                ((place + val) * s, H - 4 * h),
                ((place + val) * s, H - 2 * h),
                ((place + 0) * s, H - 2 * h),
            ]

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))

        # simple wrapper to render if the indicator value is above a threshold
        def render_if_min(value, points, color):
            if abs(value) > 1e-4:
                pygame.draw.polygon(self.surf, points=points, color=color)

        render_if_min(true_speed, vertical_ind(5, 0.02 * true_speed),
                      (255, 255, 255))
        # ABS sensors
        render_if_min(
            self.car.wheels[0].omega,
            vertical_ind(7, 0.01 * self.car.wheels[0].omega),
            (0, 0, 255),
        )
        render_if_min(
            self.car.wheels[1].omega,
            vertical_ind(8, 0.01 * self.car.wheels[1].omega),
            (0, 0, 255),
        )
        render_if_min(
            self.car.wheels[2].omega,
            vertical_ind(9, 0.01 * self.car.wheels[2].omega),
            (51, 0, 255),
        )
        render_if_min(
            self.car.wheels[3].omega,
            vertical_ind(10, 0.01 * self.car.wheels[3].omega),
            (51, 0, 255),
        )

        render_if_min(
            self.car.wheels[0].joint.angle,
            horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle),
            (0, 255, 0),
        )
        render_if_min(
            self.car.hull.angularVelocity,
            horiz_ind(30, -0.8 * self.car.hull.angularVelocity),
            (255, 0, 0),
        )

    def draw_colored_polygon(self, surface, poly, color, zoom, translation,
                             angle):
        poly = [pygame.math.Vector2(c).rotate_rad(angle) for c in poly]
        poly = [(c[0] * zoom + translation[0], c[1] * zoom + translation[1])
                for c in poly]
        gfxdraw.aapolygon(self.surf, poly, color)
        gfxdraw.filled_polygon(self.surf, poly, color)

    def _create_image_array(self, screen, size):
        scaled_screen = pygame.transform.smoothscale(screen, size)
        return np.transpose(np.array(pygame.surfarray.pixels3d(scaled_screen)),
                            axes=(1, 0, 2))

    def close(self):
        pygame.quit()
        if self.screen is not None:
            pygame.display.quit()
            self.isopen = False
Exemplo n.º 7
0
class CarRacing(gym.Env, EzPickle):
    metadata = {
        "render.modes": ["human", "rgb_array", "state_pixels"],
        "video.frames_per_second": FPS,
    }

    def __init__(self, verbose=1, obstacles=False):
        EzPickle.__init__(self)
        self.SI = SI(env=self,
                     car_shape=(4, 8),
                     image_shape=(STATE_W, STATE_H),
                     render_distance=40,
                     road_width=40 / 6,
                     fill=True,
                     interpolate=True,
                     obstacles=obstacles)
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        self.steps = 0
        self.n_obstacles = 10
        self.obstacles = obstacles
        self.dim_obstacles = (0.5, 0.5)
        self.collision_threshold = 0.1  #1m distance between obstacle and vehicle
        self.COLLISION = False
        self.fd_tile = fixtureDef(
            shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])
        )  # The fd_tile variable defines the fixture with the shape defined as a rectangle with coordinates
        # [(0, 0) | (1, 0)]
        # [(0,-1) | (1,-1)]
        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([+1, +1, +1]),
                                       dtype=np.float32)  # steer, gas, brake
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.uint8)
        self.R = lambda x, y, angle: [
            x * np.cos(angle) - y * np.sin(angle), y * np.cos(angle) + x * np.
            sin(angle)
        ]

    def _create_obstacles(self):
        "This function randomly generates obstacles along the track for the vehicle to avoid"

        obstacle_interval = np.floor(
            (len(self.track) - 1) / (self.n_obstacles)
        )  # after how many track vertices must a obstacle appear.
        count = 0
        self.obstacles_pos = np.zeros((self.n_obstacles, 4, 2))

        for i in range(len(self.track)):
            if i % obstacle_interval == 0 and count < self.n_obstacles and i > 1:
                count += 1
                alpha1, beta1, x1, y1 = self.track[i]
                alpha2, beta2, x2, y2 = self.track[i - 1]

                sign = 1 if np.random.random() < 0.5 else -1

                road1_l = (x1 - sign * self.dim_obstacles[0] * math.cos(beta1),
                           y1 - sign * self.dim_obstacles[0] * math.sin(beta1))

                road1_r = (x1 + sign * self.dim_obstacles[1] * math.cos(beta1),
                           y1 + sign * self.dim_obstacles[1] * math.sin(beta1))

                road2_l = (x2 - sign * self.dim_obstacles[0] * math.cos(beta2),
                           y2 - sign * self.dim_obstacles[0] * math.sin(beta2))

                road2_r = (x2 + sign * self.dim_obstacles[1] * math.cos(beta2),
                           y2 + sign * self.dim_obstacles[1] * math.sin(beta2))

                self.obstacle_poly.extend([
                    road1_l[0], road1_l[1], 0, road1_r[0], road1_r[1], 0,
                    road2_r[0], road2_r[1], 0, road2_l[0], road2_l[1], 0
                ])

                self.obstacles_pos[count - 1, :, 0] = np.array(
                    [road1_l[0], road1_r[0], road2_r[0], road2_l[0]])

                self.obstacles_pos[count - 1, :, 1] = np.array(
                    [road1_l[1], road1_r[1], road2_r[1], road2_l[1]])

        if len(self.obstacle_poly) // 3 == 4 * self.n_obstacles:
            return True
        else:
            print('There was a problem generating the obstacle course')
            return False

    def _create_track(self):
        "The number of checkpoints are the number of turns where the minimum is 2."

        CHECKPOINTS = 12
        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS)
            alpha = 2 * math.pi * c / CHECKPOINTS + noise
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD
            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0  # The starting x value is always =
        dest_i = 0
        laps = 0  # The number of laps required to finish the course, leave this on 0 - no lap only once through course.
        track = []
        no_freeze = 2500
        visited_other_side = False  # This indicates if the lap is completed

        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi

            while True:  # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break

                if not failed:
                    break

                alpha -= 2 * math.pi
                continue

            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            # destination vector projected on rad:
            proj = r1x * dest_dx + r1y * dest_dy
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = (track[i][0] > self.start_alpha
                                  and track[i - 1][0] <= self.start_alpha)
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" %
                  (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (
                x1 - TRACK_WIDTH * math.cos(beta1),
                y1 - TRACK_WIDTH * math.sin(beta1),
            )
            road1_r = (
                x1 + TRACK_WIDTH * math.cos(beta1),
                y1 + TRACK_WIDTH * math.sin(beta1),
            )
            road2_l = (
                x2 - TRACK_WIDTH * math.cos(beta2),
                y2 - TRACK_WIDTH * math.sin(beta2),
            )
            road2_r = (
                x2 + TRACK_WIDTH * math.cos(beta2),
                y2 + TRACK_WIDTH * math.sin(beta2),
            )
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(
                fixtures=self.fd_tile
            )  # The call of a static body may be very important and is based on the df_tile = [rl1, rr1, rl2, rr2]
            t.userData = t
            c = 0.01 * (i % 3
                        )  # This is the interchanging colors for the tiles
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0  # Here is where we can change the friction coefficient of the road from tar - offroad
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (
                    x1 + side * TRACK_WIDTH * math.cos(beta1),
                    y1 + side * TRACK_WIDTH * math.sin(beta1),
                )
                b1_r = (
                    x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                    y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1),
                )
                b2_l = (
                    x2 + side * TRACK_WIDTH * math.cos(beta2),
                    y2 + side * TRACK_WIDTH * math.sin(beta2),
                )
                b2_r = (
                    x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                    y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2),
                )
                self.road_poly.append(([b1_l, b1_r, b2_r,
                                        b2_l], (1, 1, 1) if i % 2 == 0 else
                                       (1, 0, 0)))
        self.track = track
        return True

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.obstacle_poly = []
        self.steps = 0

        while True:
            success_track = self._create_track()

            if self.obstacles:
                if success_track:
                    success_obstacles = self._create_obstacles()
                else:
                    success_obstacles = False
            else:
                success_obstacles = True  # just so it goes through to next stage

            if success_track and success_obstacles:
                break
            if self.verbose == 1:
                print("retry to generate track (normal if there are not many"
                      "instances of this message)")
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]

    def step(self, action):
        self.steps += 1
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])
        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS
        self.state = self.SI.generate_image(
        )  #self.state = self.render("state_pixels")
        step_reward = 0
        done = False

        if self.obstacles:
            self.collision()
        else:
            self.COLLISION = False

        if action is not None:  # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count == len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100

            if self.COLLISION:
                done = True
                step_reward = -100

        return self.state, step_reward, done, {}

    def render(self, mode="human"):
        assert mode in ["human", "state_pixels", "rgb_array"]
        if self.viewer is None:
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label(
                "0000",
                font_size=36,
                x=20,
                y=WINDOW_H * 2.5 / 40.00,
                anchor_x="left",
                anchor_y="center",
                color=(255, 255, 255, 255),
            )

            self.transform = rendering.Transform()

        if "t" not in self.__dict__:
            return  # reset() not called yet

        # Animate zoom first second:
        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1)
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                            scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                            scroll_y * zoom * math.cos(angle)),
        )
        self.transform.set_rotation(angle)
        self.car.draw(self.viewer, mode != "state_pixels")
        arr = None

        #--------
        VP_W = STATE_W
        VP_H = STATE_H
        #--------
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()
        win.clear()

        t = self.transform
        if mode == "rgb_array":
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == "state_pixels":
            VP_W = STATE_W
            VP_H = STATE_H
        else:
            pixel_scale = 1
            if hasattr(win.context, "_nscontext"):
                pixel_scale = (
                    win.context._nscontext.view().backingScaleFactor())  # pylint: disable=protected-access
            VP_W = int(pixel_scale * WINDOW_W)
            VP_H = int(pixel_scale * WINDOW_H)

        gl.glViewport(0, 0, VP_W, VP_H)
        t.enable()

        self.render_road()
        if self.obstacles:
            self.render_obstacles()
            self.render_collision()

        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()

        self.render_indicators(WINDOW_W, WINDOW_H)
        if mode == "human":
            win.flip()
            return self.viewer.isopen

        image_data = (pyglet.image.get_buffer_manager().get_color_buffer().
                      get_image_data())
        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep="")
        arr = arr.reshape(VP_H, VP_W, 4)
        arr = arr[::-1, :, 0:3]

        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        colors = [0.4, 0.8, 0.4, 1.0] * 4
        polygons_ = [
            +PLAYFIELD,
            +PLAYFIELD,
            0,
            +PLAYFIELD,
            -PLAYFIELD,
            0,
            -PLAYFIELD,
            -PLAYFIELD,
            0,
            -PLAYFIELD,
            +PLAYFIELD,
            0,
        ]

        k = PLAYFIELD / 20.0
        colors.extend([0.4, 0.9, 0.4, 1.0] * 4 * 20 * 20)
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                polygons_.extend([
                    k * x + k,
                    k * y + 0,
                    0,
                    k * x + 0,
                    k * y + 0,
                    0,
                    k * x + 0,
                    k * y + k,
                    0,
                    k * x + k,
                    k * y + k,
                    0,
                ])

        for poly, color in self.road_poly:  # self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color))
            colors.extend([color[0], color[1], color[2], 1] * len(poly))
            for p in poly:
                polygons_.extend([p[0], p[1], 0])

        vl = pyglet.graphics.vertex_list(
            len(polygons_) // 3,
            ("v3f", polygons_),
            (
                "c4f", colors
            )  # gl.GL_QUADS, # The // 3 is dividing by 3 but obtaining only the integer value
        )
        vl.draw(gl.GL_QUADS)

    def render_indicators(self, W, H):
        s = W / 40.0
        h = H / 40.0
        colors = [0, 0, 0, 1] * 4
        polygons = [W, 0, 0, W, 5 * h, 0, 0, 5 * h, 0, 0, 0, 0]

        def vertical_ind(place, val, color):
            colors.extend([color[0], color[1], color[2], 1] * 4)
            polygons.extend([
                place * s,
                h + h * val,
                0,
                (place + 1) * s,
                h + h * val,
                0,
                (place + 1) * s,
                h,
                0,
                (place + 0) * s,
                h,
                0,
            ])

        def horiz_ind(place, val, color):
            colors.extend([color[0], color[1], color[2], 1] * 4)
            polygons.extend([
                (place + 0) * s,
                4 * h,
                0,
                (place + val) * s,
                4 * h,
                0,
                (place + val) * s,
                2 * h,
                0,
                (place + 0) * s,
                2 * h,
                0,
            ])

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))

        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
        vertical_ind(7, 0.01 * self.car.wheels[0].omega,
                     (0.0, 0, 1))  # ABS sensors
        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
        vl = pyglet.graphics.vertex_list(
            len(polygons) // 3,
            ("v3f", polygons),
            ("c4f", colors)  # gl.GL_QUADS,
        )
        vl.draw(gl.GL_QUADS)
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()

    def render_obstacles(self):
        "This function is responsible for rendering all the obstacles randomly in the course"

        # RGB for all 4 vertices by the number
        C = [255, 5, 5, 255, 5, 5, 255, 5, 5, 255, 5, 5] * self.n_obstacles

        # Divide by 3 because there are 3 components x,y,z
        v2 = pyglet.graphics.vertex_list(
            len(self.obstacle_poly) // 3, ('v3f', self.obstacle_poly),
            ('c3B', C))
        v2.draw(gl.GL_QUADS)

    def render_collision(self):
        if self.COLLISION:
            x, y = self.car.hull.position
            t_angle = self.car.hull.angle
            x1 = (self.R(2, 3, t_angle)[0]) + (x)
            y1 = (self.R(2, 3, t_angle)[1]) + (y)
            x2 = (self.R(-2, 3, t_angle)[0]) + (x)
            y2 = (self.R(-2, 3, t_angle)[1]) + (y)
            x3 = self.R(-2, -3, t_angle)[0] + (x)
            y3 = self.R(-2, -3, t_angle)[1] + (y)
            x4 = self.R(2, -3, t_angle)[0] + (x)
            y4 = self.R(2, -3, t_angle)[1] + (y)

            V = [x1, y1, 0, x2, y2, 0, x3, y3, 0, x4, y4, 0]
            C = [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255]

            v3 = pyglet.graphics.vertex_list(4, ('v3f', V), ('c3B', C))
            v3.draw(gl.GL_QUADS)

    def collision(self):
        " This function determines whether a collision with an obstacle has occurred or not "
        " The vehicle cannot reverse, so we are only interested in the front two components of the vehicles hull"

        x, y = self.car.hull.position
        t_angle = self.car.hull.angle

        x1 = self.R(2, 3, t_angle)[0] + x
        y1 = self.R(2, 3, t_angle)[1] + y

        x2 = self.R(-2, 3, t_angle)[0] + x
        y2 = self.R(-2, 3, t_angle)[1] + y

        x3 = self.R(-2, -3, t_angle)[0] + x
        y3 = self.R(-2, -3, t_angle)[1] + y

        x4 = self.R(2, -3, t_angle)[0] + x
        y4 = self.R(2, -3, t_angle)[1] + y

        midpoints_upper = [
            (self.obstacles_pos[:, 2, 0] + self.obstacles_pos[:, 3, 0]) / 2,
            (self.obstacles_pos[:, 2, 1] + self.obstacles_pos[:, 3, 1]) / 2
        ]
        midpoints_lower = [
            (self.obstacles_pos[:, 2, 0] + self.obstacles_pos[:, 3, 0]) / 2,
            (self.obstacles_pos[:, 2, 1] + self.obstacles_pos[:, 3, 1]) / 2
        ]

        distance_1 = np.sqrt((x1 - self.obstacles_pos[:, :, 0])**2 +
                             (y1 - self.obstacles_pos[:, :, 1])**2)
        distance_2 = np.sqrt((x2 - self.obstacles_pos[:, :, 0])**2 +
                             (y2 - self.obstacles_pos[:, :, 1])**2)
        distance_3 = np.sqrt((x3 - self.obstacles_pos[:, :, 0])**2 +
                             (y3 - self.obstacles_pos[:, :, 1])**2)
        distance_4 = np.sqrt((x4 - self.obstacles_pos[:, :, 0])**2 +
                             (y4 - self.obstacles_pos[:, :, 1])**2)

        u_midpoint_1 = np.sqrt((x1 - midpoints_upper[0])**2 +
                               (y1 - midpoints_upper[1])**2) * 0.5
        u_midpoint_2 = np.sqrt((x2 - midpoints_upper[0])**2 +
                               (y2 - midpoints_upper[1])**2) * 0.5
        u_midpoint_3 = np.sqrt((x3 - midpoints_upper[0])**2 +
                               (y3 - midpoints_upper[1])**2) * 0.5
        u_midpoint_4 = np.sqrt((x4 - midpoints_upper[0])**2 +
                               (y4 - midpoints_upper[1])**2) * 0.5

        l_midpoint_1 = np.sqrt((x1 - midpoints_lower[0])**2 +
                               (y1 - midpoints_lower[1])**2) * 0.5
        l_midpoint_2 = np.sqrt((x2 - midpoints_lower[0])**2 +
                               (y2 - midpoints_lower[1])**2) * 0.5
        l_midpoint_3 = np.sqrt((x3 - midpoints_lower[0])**2 +
                               (y3 - midpoints_lower[1])**2) * 0.5
        l_midpoint_4 = np.sqrt((x4 - midpoints_lower[0])**2 +
                               (y4 - midpoints_lower[1])**2) * 0.5

        smallest_distance = np.min([
            np.min(distance_1),
            np.min(distance_2),
            np.min(distance_3),
            np.min(distance_4),
            np.min(u_midpoint_1),
            np.min(u_midpoint_2),
            np.min(u_midpoint_3),
            np.min(u_midpoint_4),
            np.min(l_midpoint_1),
            np.min(l_midpoint_2),
            np.min(l_midpoint_3),
            np.min(l_midpoint_4)
        ])

        if smallest_distance < self.collision_threshold:
            self.COLLISION = True
        else:
            self.COLLISION = False
Exemplo n.º 8
0
class CarRacing(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second': FPS
    }

    def __init__(self):
        EzPickle.__init__(self)
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        #self.invisible_state_window = None
        #self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.init_state = (0.0, 0.0, 0.0)
        self.aim_x = 5.0
        self.aim_y = 10.0
        self.aim_th = 0.0

        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([+1, +1, +1]),
                                       dtype=np.float32)  # steer, gas, brake
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.uint8)

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def random_aim(self):
        self.aim_x = np.random.rand() * 10
        self.aim_y = np.random.rand() * 10
        self.aim_th = np.random.rand()

    def _destroy(self):
        if not self.road: return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        self.road = []

        car = np.array([
            [-CAR_W / 2, -CAR_L / 2],
            [CAR_W / 2, -CAR_L / 2],
            [CAR_W / 2, CAR_L / 2],
            [-CAR_W / 2, CAR_L / 2],
        ])

        rot = np.array([[np.cos(self.aim_th), -np.sin(self.aim_th)],
                        [np.sin(self.aim_th),
                         np.cos(self.aim_th)]])

        car_rot = np.dot(rot, car.T)
        car_rot_tran = car_rot + np.array([[self.aim_x], [self.aim_y]])
        aim = car_rot_tran.T

        t = self.world.CreateStaticBody(fixtures=fixtureDef(shape=polygonShape(
            vertices=aim.tolist())))
        t.userData = t
        t.color = [AIM_COLOR[0], AIM_COLOR[1], AIM_COLOR[2]]
        t.road_visited = False
        t.road_friction = 1.0
        t.fixtures[0].sensor = True
        self.road_poly.append((aim.tolist(), t.color))
        self.road.append(t)

        return True

    def reset(self):
        self._destroy()
        self.random_aim()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.human_render = False

        while True:
            success = self._create_track()
            if success: break
            print(
                "retry to generate track (normal if there are not many of this messages)"
            )
        self.car = Car(self.world, *self.init_state)

        return self.step(None)[0]

    def step(self, action):
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS

        self.state = self.render("state_pixels")

        step_reward = 0
        done = False
        if action is not None:  # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            #self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100

        return self.state, step_reward, done, {}

    def render(self, mode='human'):
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label('0000',
                                                 font_size=36,
                                                 x=20,
                                                 y=WINDOW_H * 2.5 / 40.00,
                                                 anchor_x='left',
                                                 anchor_y='center',
                                                 color=(255, 255, 255, 255))
            self.transform = rendering.Transform()

        if "t" not in self.__dict__: return  # reset() not called yet

        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(
            self.t, 1)  # Animate zoom first second
        zoom_state = ZOOM * SCALE * STATE_W / WINDOW_W
        zoom_video = ZOOM * SCALE * VIDEO_W / WINDOW_W
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                            scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                            scroll_y * zoom * math.cos(angle)))
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode != "state_pixels")

        arr = None
        win = self.viewer.window
        if mode != 'state_pixels':
            win.switch_to()
            win.dispatch_events()
        if mode == "rgb_array" or mode == "state_pixels":
            win.clear()
            t = self.transform
            if mode == 'rgb_array':
                VP_W = VIDEO_W
                VP_H = VIDEO_H
            else:
                VP_W = STATE_W
                VP_H = STATE_H
            gl.glViewport(0, 0, VP_W, VP_H)
            t.enable()
            self.render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()
            t.disable()
            self.render_indicators(WINDOW_W,
                                   WINDOW_H)  # TODO: find why 2x needed, wtf
            image_data = pyglet.image.get_buffer_manager().get_color_buffer(
            ).get_image_data()
            arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
            arr = arr.reshape(VP_H, VP_W, 4)
            arr = arr[::-1, :, 0:3]

        if mode == "rgb_array" and not self.human_render:  # agent can call or not call env.render() itself when recording video.
            win.flip()

        if mode == 'human':
            self.human_render = True
            win.clear()
            t = self.transform
            gl.glViewport(0, 0, WINDOW_W, WINDOW_H)
            t.enable()
            self.render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()
            t.disable()
            self.render_indicators(WINDOW_W, WINDOW_H)
            win.flip()

        self.viewer.onetime_geoms = []
        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(*ROAD_COLOR, 1.0)
        gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        for poly, color in self.road_poly:
            gl.glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)
        gl.glEnd()

    def render_indicators(self, W, H):
        gl.glBegin(gl.GL_QUADS)
        s = W / 40.0
        h = H / 40.0
        gl.glColor4f(0, 0, 0, 1)
        gl.glVertex3f(W, 0, 0)
        gl.glVertex3f(W, 5 * h, 0)
        gl.glVertex3f(0, 5 * h, 0)
        gl.glVertex3f(0, 0, 0)

        def vertical_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h, 0)
            gl.glVertex3f((place + 0) * s, h, 0)

        def horiz_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 2 * h, 0)
            gl.glVertex3f((place + 0) * s, 2 * h, 0)

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
        vertical_ind(7, 0.01 * self.car.wheels[0].omega,
                     (0.0, 0, 1))  # ABS sensors
        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
        gl.glEnd()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()
Exemplo n.º 9
0
class CarRacingSoft(gym.Env, EzPickle):
    metadata = {'render.modes': ['human'], 'video.frames_per_second': FPS}

    color_black = np.array([0., 0., 0.])
    color_white = np.array([1., 1., 1.])
    color_red = np.array([1., 0., 0.])
    color_green = np.array([0., 1., 0.])
    color_grass_dark = np.array([0.4, 0.8, 0.4])
    color_grass_light = np.array([0.4, 0.9, 0.4])
    color_abs_light = np.array([0., 0., 1.])
    color_abs_dark = np.array([0.2, 0., 1.])

    def __init__(self, frame_skip, verbose=False):
        EzPickle.__init__(self)

        if frame_skip < 1:
            raise ValueError("The value of frame_skip must be at least 1")

        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        self.fd_tile = fixtureDef(shape=polygonShape(
            vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))

        self.action_space = spaces.Box(np.array([-1, 0, 0], dtype=np.float32),
                                       np.array([+1, +1, +1],
                                                dtype=np.float32),
                                       dtype=np.float32)  # steer, gas, brake

        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.float32)
        self.state = np.zeros([STATE_H, STATE_W, 3], dtype=np.float32)
        self.frame_skip = frame_skip

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform(
                0, 2 * math.pi * 1 / CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD
            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi
            while True:  # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break
                if not failed:
                    break
                alpha -= 2 * math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x * dest_dx + r1y * dest_dy  # destination vector projected on rad
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[
                i - 1][0] <= self.start_alpha
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose:
            print("Track generation: %i..%i -> %i-tiles track" %
                  (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (x1 - TRACK_WIDTH * math.cos(beta1),
                       y1 - TRACK_WIDTH * math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH * math.cos(beta1),
                       y1 + TRACK_WIDTH * math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH * math.cos(beta2),
                       y2 - TRACK_WIDTH * math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH * math.cos(beta2),
                       y2 + TRACK_WIDTH * math.sin(beta2))
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.userData = t
            c = 0.01 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1),
                        y1 + side * TRACK_WIDTH * math.sin(beta1))
                b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                        y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1))
                b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2),
                        y2 + side * TRACK_WIDTH * math.sin(beta2))
                b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                        y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2))
                self.road_poly.append(([b1_l, b1_r, b2_r,
                                        b2_l], (1, 1, 1) if i % 2 == 0 else
                                       (1, 0, 0)))
        self.track = track
        return True

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.human_render = False
        self.frames = 0

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose:
                print(
                    "retry to generate track (normal if there are not many instances of this message)"
                )
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]

    def step(self, action):
        total_reward = 0
        for _ in range(self.frame_skip):
            if action is not None:
                self.car.steer(-action[0])
                self.car.gas(action[1])
                self.car.brake(action[2])

            self.car.step(1.0 / FPS)
            self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
            self.t += 1.0 / FPS

            step_reward = 0
            done = False
            if action is not None:  # First step without action, called from reset()
                self.reward -= 0.1
                # We actually don't want to count fuel spent, we want car to be faster.
                # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
                self.car.fuel_spent = 0.0
                step_reward = self.reward - self.prev_reward
                self.prev_reward = self.reward
                if self.tile_visited_count == len(self.track):
                    done = True
                x, y = self.car.hull.position
                if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                    done = True
                    step_reward = -100

            total_reward += step_reward
            self.frames += 1
            if self.frames > 1000: done = True
            if done or action is None: break

        self._draw()

        green = (self.state[66:78, 43:52, 1] > 0.5)
        # print("green:", sum(green.flatten()))
        speed = sum(self.state[85:, 2, 0])

        abs1 = sum(self.state[85:, 9, 2])
        abs2 = sum(self.state[85:, 14, 2])
        abs3 = sum(self.state[85:, 19, 2])
        abs4 = sum(self.state[85:, 24, 2])

        steering_input_left = sum(self.state[90, 37:48, 1])
        steering_input_right = sum(self.state[90, 47:58, 1])

        steering = steering_input_right - steering_input_left

        rotation_left = sum(self.state[90, 59:72, 0])
        rotation_right = sum(self.state[90, 72:85, 0])

        rotation = rotation_right - rotation_left

        print(
            f"speed:{speed}\tabs:\t{abs1}\t{abs2}\t{abs3}\t{abs4}\tsteering:{steering}\trotation:{rotation}"
        )

        return np.copy(self.state), total_reward, done, {}

    def render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return

        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.SimpleImageViewer()

        self.viewer.imshow((self.state.repeat(RENDER_UPSCALE, axis=0).repeat(
            RENDER_UPSCALE, axis=1) * 255).astype(np.uint8))

    def _draw(self):
        # Simple 2D affine transformation class
        class Transform():
            def __init__(self, *values):
                self.matrix = values if len(values) else [
                    1., 0., 0., 0., 1., 0., 0., 0., 1.
                ]

            @staticmethod
            def translation(x, y):
                return Transform(1.0, 0.0, x, 0.0, 1.0, y, 0.0, 0.0, 1.0)

            @staticmethod
            def scale(x, y):
                return Transform(x, 0.0, 0.0, 0.0, y, 0.0, 0.0, 0.0, 1.0)

            @staticmethod
            def rotation(angle):
                cos, sin = math.cos(angle), math.sin(angle)
                return Transform(cos, -sin, 0.0, sin, cos, 0.0, 0.0, 0.0, 1.0)

            def apply_and_swap(self, point):
                sa, sb, sc, sd, se, sf, _, _, _ = self.matrix
                x, y = point
                return (x * sd + y * se + sf, x * sa + y * sb + sc)

            def __mul__(self, other):
                sa, sb, sc, sd, se, sf, _, _, _ = self.matrix
                oa, ob, oc, od, oe, of, _, _, _ = other.matrix
                return Transform(sa * oa + sb * od, sa * ob + sb * oe,
                                 sa * oc + sb * of + sc, sd * oa + se * od,
                                 sd * ob + se * oe, sd * oc + se * of + sf,
                                 0.0, 0.0, 1.0)

            def __imul__(self, other):
                return self.__mul__(other)

        class Renderer():
            def __init__(self, env):
                self.env = env

            def draw_polygon(self, path, color):
                self.env._fill_polygon(path, self.env.state, color)

        if "t" not in self.__dict__: return  # reset() not called yet

        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(
            self.t, 1)  # Animate zoom first second
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform = Transform.translation(STATE_W / 2, STATE_H * 3 / 4)
        self.transform *= Transform.scale(STATE_W / 1000, STATE_H / 1000)
        self.transform *= Transform.scale(zoom, -zoom)
        self.transform *= Transform.rotation(angle)
        self.transform *= Transform.translation(-scroll_x, -scroll_y)

        # Clear
        self.state[:, :, :] = self.color_black

        # Draw road, car and indicators
        self._render_road(scroll_x, scroll_y, zoom)
        self.car.draw(Renderer(self), False)
        self._render_indicators()

    def _render_road(self, scroll_x, scroll_y, zoom):
        self._fill_polygon([(-PLAYFIELD, +PLAYFIELD), (+PLAYFIELD, +PLAYFIELD),
                            (+PLAYFIELD, -PLAYFIELD),
                            (-PLAYFIELD, -PLAYFIELD)], self.state,
                           self.color_grass_dark)
        k = PLAYFIELD / 20.0
        mindist = 2000000 / (zoom**2)
        for x in range(-20, 20, 2):
            kx = k * x
            dist = (kx - scroll_x)**2
            if dist >= mindist: continue
            for y in range(-20, 20, 2):
                ky = k * y
                if dist + (ky - scroll_y)**2 >= mindist: continue
                self._fill_polygon([(kx + k, ky + 0), (kx + 0, ky + 0),
                                    (kx + 0, ky + k), (kx + k, ky + k)],
                                   self.state, self.color_grass_light)
        for poly, color in self.road_poly:
            if (poly[0][0] - scroll_x)**2 + (poly[0][1] -
                                             scroll_y)**2 >= mindist:
                continue
            self._fill_polygon(poly, self.state, color)

    def _render_indicators(self):
        s = STATE_W / 40
        h = STATE_H / 40
        self._fill_polygon([(0, STATE_H), (STATE_W, STATE_H),
                            (STATE_W, STATE_H - 5 * h), (0, STATE_H - 5 * h)],
                           self.state,
                           self.color_black,
                           transform=False)

        def vertical_ind(place, val, color):
            self._fill_polygon([((place + 0) * s, STATE_H - h - h * val),
                                ((place + 2) * s, STATE_H - h - h * val),
                                ((place + 2) * s, STATE_H - h),
                                ((place + 0) * s, STATE_H - h)],
                               self.state,
                               color,
                               transform=False)

        def horiz_ind(place, val, color):
            self._fill_polygon([((place + 0) * s, STATE_H - 4 * h),
                                ((place + val) * s, STATE_H - 4 * h),
                                ((place + val) * s, STATE_H - 1.5 * h),
                                ((place + 0) * s, STATE_H - 1.5 * h)],
                               self.state,
                               color,
                               transform=False)

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(1, 0.02 * true_speed, self.color_white)
        vertical_ind(4, 0.01 * self.car.wheels[0].omega,
                     self.color_abs_light)  # ABS sensors
        vertical_ind(6, 0.01 * self.car.wheels[1].omega, self.color_abs_light)
        vertical_ind(8, 0.01 * self.car.wheels[2].omega, self.color_abs_dark)
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, self.color_abs_dark)
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, self.color_green)
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, self.color_red)

    # Adapted from https://github.com/luispedro/mahotas/blob/master/mahotas/polygon.py
    def _fill_polygon(self, polygon, canvas, color, transform=True):
        '''
        fill_polygon([(y0,x0), (y1,x1),...], canvas, color=1)
        Draw a filled polygon in canvas
        Parameters
        ----------
        polygon : list of pairs
            a list of (y,x) points
        canvas : ndarray
            where to draw, will be modified in place
        color : integer, optional
            which colour to use (default: 1)
        '''
        # algorithm adapted from: http://www.alienryderflex.com/polygon_fill/
        if not len(polygon):
            return

        if transform:
            polygon = [
                self.transform.apply_and_swap(point) for point in polygon
            ]
        else:
            polygon = [(float(y), float(x)) for x, y in polygon]

        min_y = max(int(min(y for y, x in polygon)), 0)
        if min_y >= canvas.shape[0]: return
        max_y = min(max(int(max(y + 1 for y, x in polygon)), 0),
                    canvas.shape[0])
        if max_y <= 0: return
        if min(x for y, x in polygon) >= canvas.shape[1]: return
        if max(x for y, x in polygon) < 0: return
        for y in range(min_y, max_y):
            nodes = []
            j = -1
            for i, p in enumerate(polygon):
                pj = polygon[j]
                if p[0] < y and pj[0] >= y or pj[0] < y and p[0] >= y:
                    dy = pj[0] - p[0]
                    if dy:
                        nodes.append((p[1] + (y - p[0]) / (pj[0] - p[0]) *
                                      (pj[1] - p[1])))
                    elif p[0] == y:
                        nodes.append(p[1])
                j = i
            nodes.sort()
            for n, nn in zip(nodes[::2], nodes[1::2]):
                canvas[y,
                       max(int(n), 0):min(max(int(nn), 0), canvas.shape[1]
                                          )] = color
Exemplo n.º 10
0
class CarRacing3(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'state_pixels frame size': [STATE_H, STATE_W],
        'render frame size': [WINDOW_H, WINDOW_W],
        'FPS, 1/timebase': FPS,
        #'discretization': DISCRETE,
        'Zoom_level': ZOOM,
        'Flight start': ZOOM_START,
        'show track on 1st frame': TRACK_FIRST,
    }

    def __init__(self, seed=None, **kwargs):
        EzPickle.__init__(self)
        self._seed(seed)
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.road = None
        self.car = None
        self.newtile = False
        self.ep_return = 0.0
        self.action_taken = +np.inf
        self.fd_tile = fixtureDef(shape=polygonShape(
            vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))

        # Config
        self._set_config(**kwargs)
        #self._org_config = deepcopy(kwargs)

    def _set_config(
            self,
            game_color=1,  # State (frame) color option: 0 = RGB, 1 = Grayscale, 2 = Green only
            indicators=True,  # show or not bottom Info Panel
            frames_per_state=4,  # stacked (rolling history) Frames on each state [1-inf], latest observation always on first Frame
            skip_frames=3,  # number of consecutive Frames to skip between state saves [0-4]
            discre=ACT,  # Action discretization function, format [[steer0, throtle0, brake0], [steer1, ...], ...]. None for continuous
            use_track=3,  # number of times to use the same Track, [1-100]. More than 20 high risk of overfitting!!
            episodes_per_track=5,  # number of evenly distributed starting points on each track [1-20]. Every time you call reset(), the env automatically starts at the next point
            tr_complexity=12,  # generated Track geometric Complexity, [6-20]
            tr_width=45,  # relative Track Width, [30-50]
            patience=2.0,  # max time in secs without Progress, [0.5-20]
            off_track=1.0,  # max time in secs Driving on Grass, [0.0-5]
            f_reward=CONT_REWARD,  # Reward Funtion coefficients, refer to Docu for details
            num_obstacles=5,  # Obstacle objects placed on track [0-10]
            end_on_contact=False,  # Stop Episode on contact with obstacle, not recommended for starting-phase of training
            obst_location=0,  # array pre-setting obstacle Location, in %track. Negative value means tracks's left-hand side. 0 for random location
            oily_patch=False,  # use all obstacles as Low-friction road (oily patch)
            verbose=2):

        #Verbosity
        self.verbose = verbose

        #obstacle parameters
        self.num_obstacles = np.clip(num_obstacles, 0, 10)
        self.end_on_contact = end_on_contact
        self.oily_patch = oily_patch
        if obst_location != 0 and len(obst_location) < num_obstacles:
            print("#####################################")
            print("Warning: incomplete obstacle location")
            print("Defaulting to random placement")
            self.obst_location = 0  #None
        else:
            self.obst_location = np.array(obst_location)

        #reward coefs verification
        if len(f_reward) < len(CONT_REWARD):
            print("####################################")
            print("Warning: incomplete reward function")
            print("Defaulting to predefined function!!!")
            self.f_reward = CONT_REWARD
        else:
            self.f_reward = f_reward

        # Times to use same track, up to 100 times. More than 20 high risk of overfitting!!
        self.repeat_track = np.clip(use_track, 1, 100)
        self.track_use = +np.inf

        # Number of episodes on same track, with evenly distributed starting points,
        # not more than 20 episodes
        self.episodes_per_track = np.clip(episodes_per_track, 1, 20)

        # track generation complexity
        self.complexity = np.clip(tr_complexity, 6, 20)

        # track width
        self.tr_width = np.clip(tr_width, 30, 50) / SCALE

        # Max time without progress
        self.patience = np.clip(patience, 0.5, 20)
        # Max time off-track
        self.off_track = np.clip(off_track, 0, 5)

        # Show or not bottom info panel
        self.indicators = indicators

        # Grayscale and acceptable frames
        self.grayscale = game_color
        if not self.grayscale:
            if frames_per_state > 1:
                print("####################################")
                print("Warning: making frames_per_state = 1")
                print("No support for several frames in RGB")
                frames_per_state = 1
                skip_frames = 0

        # Frames to be skipped from state (max 4)
        self.skip_frames = np.clip(skip_frames + 1, 1, 5)

        # Frames per state
        self.frames_per_state = frames_per_state if frames_per_state > 0 else 1
        if self.frames_per_state > 1:
            lst = list(
                range(0, self.frames_per_state * self.skip_frames,
                      self.skip_frames))
            self._update_index = [lst[-1]] + lst[:-1]

        # Gym spaces, observation and action
        self.discre = discre
        if discre == None:
            self.action_space = spaces.Box(
                np.array([-0.4, 0, 0]),
                np.array([+0.4, +1, +1]),
                dtype=np.float32)  # steer, gas, brake
        else:
            self.action_space = spaces.Discrete(len(discre))

        if game_color:
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(STATE_H, STATE_W,
                                                       self.frames_per_state),
                                                dtype=np.uint8)
        else:
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(STATE_H, STATE_W, 3),
                                                dtype=np.uint8)

    def _update_state(self, new_frame):
        if self.frames_per_state > 1:
            self.int_state[:, :, -1] = new_frame
            self.state = self.int_state[:, :, self._update_index]
            self.int_state = np.roll(self.int_state, 1, 2)
        else:
            self.state = new_frame

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _create_track(self):
        # Create checkpoints
        CHECKPOINTS = self.complexity
        checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform(
                0, 2 * math.pi * 1 / CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
            if c == 0:
                alpha = 0
                rad = 1.0 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.0 * TRACK_RAD
            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))

        # print "\n".join(str(h) for h in checkpoints)
        # self.road_poly = [ (    # uncomment this to see checkpoints
        #    [ (tx,ty) for a,tx,ty in checkpoints ],
        #    (0.7,0.7,0.9) ) ]

        # Go from one checkpoint to another to create track
        x, y, beta = 1.0 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        waypoint = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi
            while True:  # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break
                if not failed:
                    break
                alpha -= 2 * math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x * dest_dx + r1y * dest_dy  # destination vector projected on rad
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            waypoint.append([x, y])
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break
        # print "\n".join([str(t) for t in enumerate(track)])

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[
                i - 1][0] <= self.start_alpha
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose > 0:
            print("Track generation: %i..%i -> %i-tiles track, complex %i" %
                  (i1, i2, i2 - i1, self.complexity))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]
        waypoint = waypoint[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False  # Failed

        # Red-white border on hard turns, pure colors
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Get random tile for obstacles, without replacement
        if np.sum(self.obst_location) == 0:
            obstacle_tiles_ids = np.random.choice(range(10,
                                                        len(track) - 6),
                                                  self.num_obstacles,
                                                  replace=False)
            obstacle_tiles_ids *= (
                np.random.randint(0, 2, self.num_obstacles) * 2 - 1)
            #obstacle_tiles_ids[0] = 4
        else:
            obstacle_tiles_ids = np.rint(self.obst_location * len(track) /
                                         100).astype(int)
            obstacle_tiles_ids = obstacle_tiles_ids[0:self.num_obstacles]
        if self.verbose >= 2:
            print(self.num_obstacles, ' obstacles on tiles: ',
                  obstacle_tiles_ids[np.argsort(np.abs(obstacle_tiles_ids))])

        #stores values and call tile generation
        self.border = border
        self.track = track
        self.waypoints = np.asarray(waypoint)
        self.obstacle_tiles_ids = obstacle_tiles_ids
        self._create_tiles(track, border)

        return True  #self.waypoint #True

    def _give_track(self):
        return self.track, self.waypoints, self.obstacles_poly

    def _create_tiles(self, track, border):
        # first you need to clear everything
        if self.road is not None:
            for t in self.road:
                self.world.DestroyBody(t)
        self.road = []
        self.road_poly = []

        # Create track tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (x1 - self.tr_width * math.cos(beta1),
                       y1 - self.tr_width * math.sin(beta1))
            road1_r = (x1 + self.tr_width * math.cos(beta1),
                       y1 + self.tr_width * math.sin(beta1))
            road2_l = (x2 - self.tr_width * math.cos(beta2),
                       y2 - self.tr_width * math.sin(beta2))
            road2_r = (x2 + self.tr_width * math.cos(beta2),
                       y2 + self.tr_width * math.sin(beta2))
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.userData = t
            c = 0.02 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.typename = 'tile'
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side * self.tr_width * math.cos(beta1),
                        y1 + side * self.tr_width * math.sin(beta1))
                b1_r = (x1 + side * (self.tr_width + BORDER) * math.cos(beta1),
                        y1 + side * (self.tr_width + BORDER) * math.sin(beta1))
                b2_l = (x2 + side * self.tr_width * math.cos(beta2),
                        y2 + side * self.tr_width * math.sin(beta2))
                b2_r = (x2 + side * (self.tr_width + BORDER) * math.cos(beta2),
                        y2 + side * (self.tr_width + BORDER) * math.sin(beta2))
                self.road_poly.append(
                    ([b1_l, b1_r, b2_r,
                      b2_l], (1, 1, 1) if i % 2 == 0 else BORDER_COLOR))

        #create obstacles tiles
        if self.num_obstacles:
            self._create_obstacles()

    def _create_obstacles(self):
        # Create obstacle (blue rectangle of fixed width and randomish position in tile)
        count = 1
        self.obstacles_poly = []
        width = self.tr_width / 2
        obst_len = 3 if self.oily_patch else 1
        for idx in self.obstacle_tiles_ids:
            if idx < 0:
                idx = -idx
                alpha1, beta1, x1, y1 = self.track[idx]
                alpha2, beta2, x2, y2 = self.track[idx + obst_len]
                p1 = (x1 - width * math.cos(beta1),
                      y1 - width * math.sin(beta1))
                p2 = (x1, y1)
                p3 = (x2, y2)
                p4 = (x2 - width * math.cos(beta2),
                      y2 - width * math.sin(beta2))
            else:
                alpha1, beta1, x1, y1 = self.track[idx]
                alpha2, beta2, x2, y2 = self.track[idx + obst_len]
                p1 = (x1, y1)
                p2 = (x1 + width * math.cos(beta1),
                      y1 + width * math.sin(beta1))
                p3 = (x2 + width * math.cos(beta2),
                      y2 + width * math.sin(beta2))
                p4 = (x2, y2)

            vertices = [p1, p2, p3, p4]

            # Add it to obstacles, Add it to poly_obstacles
            t = self.world.CreateStaticBody(fixtures=fixtureDef(
                shape=polygonShape(vertices=vertices)))
            t.userData = t
            if self.oily_patch:
                t.color = OILY_COLOR
                t.road_friction = 0.2
            else:
                t.color = OBSTACLE_COLOR
                t.road_friction = 1.0
            t.typename = 'obstacle'
            t.road_visited = False
            t.id = count
            t.tile_id = idx
            t.fixtures[0].sensor = True
            self.road.append(t)
            self.obstacles_poly.append((vertices, t.color))
            count += 1

    def _closest_node(self, node, nodes):
        #nodes = np.asarray(nodes)
        deltas = nodes - node
        dist_2 = np.einsum('ij,ij->i', deltas, deltas)
        return np.argmin(dist_2)

    def _closest_dist(self, node, nodes):
        #nodes = np.asarray(nodes)
        deltas = nodes - node
        dist_2 = np.einsum('ij,ij->i', deltas, deltas)
        return np.sqrt(min(dist_2))

    def _render_road(self):
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(GRASS_COLOR[0], GRASS_COLOR[1], GRASS_COLOR[2], 1.0)
        gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)

        gl.glColor4f(GRASS_COLOR[0] - 0, GRASS_COLOR[1] + 0.1,
                     GRASS_COLOR[2] - 0, 1.0)
        k = PLAYFIELD / 20.0
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                gl.glVertex3f(k * x + k, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + k, 0)
                gl.glVertex3f(k * x + k, k * y + k, 0)

        for poly, color in self.road_poly:
            gl.glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)

        if self.num_obstacles > 0:
            self._render_obstacles()

        gl.glEnd()

    def _render_obstacles(self):
        #Can only be called inside a glBegin!!!
        for poly, color in self.obstacles_poly:  # drawing road old way
            gl.glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)

    def _render_indicators(self, W, H):
        def vertical_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h, 0)
            gl.glVertex3f((place + 0) * s, h, 0)

        def horiz_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 2 * h, 0)
            gl.glVertex3f((place + 0) * s, 2 * h, 0)

        s = W / 4  #horizontal slot separation
        #h = H_INDI   #vertical pixels definition
        h = H / 40.0

        #black bar, 5x h height
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(0, 0, 0, 1)
        gl.glVertex3f(W, 0, 0)
        gl.glVertex3f(W, 5 * h, 0)
        gl.glVertex3f(0, 5 * h, 0)
        gl.glVertex3f(0, 0, 0)

        #3 hor indicators
        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))
        #vertical_ind(5, 0.02*true_speed, (1,1,1))
        horiz_ind(1.0, 0.015 * true_speed, (1, 1, 1))
        horiz_ind(2.5, -1 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(3.5, np.clip(-0.03 * self.car.hull.angularVelocity, -0.4,
                               0.4), (1, 1, 0))
        #vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors
        #vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1))
        #vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1))
        #vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1))
        gl.glEnd()

        #total_reward
        self.score_label.text = "%02.1f" % self.ep_return
        self.score_label.draw()

    def reset(self):
        self.ep_return = 0.0
        self.newtile = False
        self.tile_visited_count = 0
        self.last_touch_with_track = 0
        self.last_new_tile = 0
        self.obst_contact = False
        self.obst_contact_count = 0
        self.obst_contact_list = []
        self.t = 0.0
        self.steps_in_episode = 0
        self.state = np.zeros(self.observation_space.shape)
        self.internal_frames = self.skip_frames * (self.frames_per_state -
                                                   1) + 1
        self.int_state = np.zeros([STATE_H, STATE_W, self.internal_frames])

        if self.track_use >= self.repeat_track * self.episodes_per_track:
            intento = 0
            while intento < 21:
                success = self._create_track()
                intento += 1
                if success:
                    self.track_use = 0
                    self.episode_start = range(
                        0, len(self.track),
                        int(len(self.track) / self.episodes_per_track))
                    #print(self.episode_start)
                    break
                if self.verbose > 0:
                    print(
                        intento,
                        " retry to generate new track (normal below 10, limit 20)"
                    )
        else:
            self._create_tiles(self.track, self.border)

        start_tile = self.episode_start[self.track_use %
                                        self.episodes_per_track]
        #print(start_tile, self.track_use, self.episodes_per_track)

        if self.car is not None:
            self.car.destroy()
        if self.episodes_per_track > 1:
            self.car = Car(self.world, *self.track[start_tile][1:4])
        else:
            self.car = Car(self.world, *self.track[0][1:4])

        #trying to detect two very close reset()
        if self.action_taken > 2:
            self.track_use += 1
            self.action_taken = 0
        #self.track_use += 1

        return self.step(None)[0]

    def reset_track(self):
        self.track_use = +np.inf
        self.reset()
        return self.step(None)[0]

    def step(self, action):
        # Avoid first step with action=None, called from reset()
        if action is None:
            #render car and environment
            self.car.steer(0)
            self.car.step(0)
            self.world.Step(0, 6 * 30, 2 * 30)
            #step_reward = 0
            #self.state(self.render("state_pixels"))
        else:
            if not self.discre == None:
                action = self.discre[action]
            #moves the car per action, advances time
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])
            self.t += 1.0 / FPS
            self.steps_in_episode += 1
            self.action_taken += 1
            #render car and environment
            self.car.step(1.0 / FPS)
            self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)

        #generates new observation state
        #self.state[:,:,0] = self.render("state_pixels") # Old code, only one frame
        self._update_state(self.render("state_pixels"))

        ##REWARDS
        x, y = self.car.hull.position
        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1])) / 100
        wheel_angle = abs(self.car.wheels[0].joint.angle) / 0.4
        done = False

        # reward given each step: step taken, distance to centerline, normalized speed [0-1], normalized steer angle [0-1]
        step_reward = self.f_reward[0]

        #reward distance to centerline, proportional to trackwidth
        dist = 1 - self._closest_dist([x, y], self.waypoints) / self.tr_width
        step_reward += self.f_reward[1] * np.clip(dist, -1, 1)

        #reward for speed
        step_reward += self.f_reward[2] * true_speed

        #reward for steer angle
        step_reward += self.f_reward[3] * wheel_angle

        #reward for collision with obstacle
        step_reward += self.f_reward[10] * self.obst_contact

        ## reward given on new tile touched: proportional of advance, %advance/steps_taken
        if self.newtile:
            step_reward += self.f_reward[4] * 100 / len(self.track)
            step_reward += self.f_reward[
                5] * self.tile_visited_count / self.steps_in_episode
            self.newtile = False

    ## calculates reward penalties, showstopper
    #reward for obstacles:  obstacle hit (each step), obstacle collided (episode end)
        if self.end_on_contact and self.obst_contact:
            step_reward = self.f_reward[11]
            done = True
            if self.verbose > 0:
                print(
                    self.track_use, " ended by collision. Steps",
                    self.steps_in_episode, " %advance",
                    int(self.tile_visited_count / len(self.track) * 1000) / 10,
                    " played reward",
                    int(100 * self.ep_return) / 100, " last penalty",
                    step_reward)
            if self.verbose > 2:
                print(self.obst_contact_count, " collided obstacles: ",
                      self.obst_contact_list)

        # reward given at episode end: all tiles touched (track finished), patience or off-raod exceeded, out of bounds, max_steps exceeded
        #if too many seconds lacking progress
        if self.t - self.last_new_tile > self.patience:
            step_reward = self.f_reward[7]
            done = True
            if self.verbose > 0:
                print(
                    self.track_use, " cut by time without progress. Steps",
                    self.steps_in_episode, " %advance",
                    int(self.tile_visited_count / len(self.track) * 1000) / 10,
                    " played reward",
                    int(100 * self.ep_return) / 100, " last penalty",
                    step_reward)

        #if too many seconds off-track
        if self.t - self.last_touch_with_track > self.off_track:
            step_reward = self.f_reward[7]
            done = True
            if self.verbose > 0:
                print(
                    self.track_use, " cut by time off-track. Steps",
                    self.steps_in_episode, " %advance",
                    int(self.tile_visited_count / len(self.track) * 1000) / 10,
                    " played reward",
                    int(100 * self.ep_return) / 100, " last penalty",
                    step_reward)

        #check out-of-bounds car position
        if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
            step_reward = self.f_reward[8]
            done = True
            if self.verbose > 0:
                print(
                    self.track_use, " out of limits. Steps",
                    self.steps_in_episode, " %advance",
                    int(self.tile_visited_count / len(self.track) * 1000) / 10,
                    " played reward",
                    int(100 * self.ep_return) / 100, " last penalty",
                    step_reward)

        #episode limit, as registered
        if self.steps_in_episode >= 2000:
            step_reward = self.f_reward[9]
            done = True
            if self.verbose > 0:
                print(
                    self.track_use, " env max steps reached",
                    self.steps_in_episode, " %advance",
                    int(self.tile_visited_count / len(self.track) * 1000) / 10,
                    " played reward",
                    int(100 * self.ep_return) / 100, " last penalty",
                    step_reward)

        #check touched all tiles, to finish
        if self.tile_visited_count == len(self.track):
            step_reward = self.f_reward[6]
            done = True
            if self.verbose > 0:
                print(self.track_use, " Finalized in Steps",
                      self.steps_in_episode, " with return=total_reward",
                      self.ep_return + step_reward)

        #clear reward if no action intended, from reset
        if action is None:
            step_reward = 0
            done = False

        #internal counting reward, for display
        self.ep_return += step_reward

        return self.state, step_reward, done, {
        }  #{'episode', self.tile_visited_count/len(self.track)}

    def render(self, mode='human'):
        assert mode in ['human', 'state_pixels', 'rgb_array']
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label(
                '00.0',
                font_size=24,
                x=10,
                y=WINDOW_H * 2.5 / 40.00,  #2.5*H_INDI, 
                anchor_x='left',
                anchor_y='center',
                color=(255, 255, 255, 255))
            self.transform = rendering.Transform()

        if "t" not in self.__dict__: return  # reset() not called yet

        if ZOOM_START:  # Animate zoom during first second
            zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(
                self.t, 1)
        else:
            zoom = ZOOM * SCALE

        if TRACK_FIRST and self.t == 0:  #shows whole track in first frame; checks first step, from reset()
            self.transform.set_scale(TRACK_ZOOM, TRACK_ZOOM)
            self.transform.set_translation(WINDOW_W / 2, WINDOW_H / 2)
            self.transform.set_rotation(0)
        else:  #every regular step updates the car visualization after action
            scroll_x = self.car.hull.position[0]
            scroll_y = self.car.hull.position[1]
            angle = -self.car.hull.angle
            vel = self.car.hull.linearVelocity
            if np.linalg.norm(vel) > 0.5:
                angle = math.atan2(vel[0], vel[1])
            self.transform.set_scale(zoom, zoom)
            self.transform.set_translation(
                WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                                scroll_y * zoom * math.sin(angle)),
                WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                                scroll_y * zoom * math.cos(angle)))
            self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode != "state_pixels")
        #car_dynamics.draw particles only when not in state_pixels

        arr = None
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()

        win.clear()
        t = self.transform
        if mode == 'rgb_array':
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == 'state_pixels':
            VP_W = STATE_W
            VP_H = STATE_H
        else:
            pixel_scale = 1
            if hasattr(win.context, '_nscontext'):
                pixel_scale = win.context._nscontext.view().backingScaleFactor(
                )  # pylint: disable=protected-access
            VP_W = int(pixel_scale * WINDOW_W)
            VP_H = int(pixel_scale * WINDOW_H)

        gl.glViewport(0, 0, VP_W, VP_H)
        t.enable()
        self._render_road()
        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()

        # plots the indicators
        if self.indicators and (not TRACK_FIRST or self.t >= 1.0 / FPS):
            #            self._render_indicators(VP_W, VP_H)
            self._render_indicators(WINDOW_W, WINDOW_H)

        if mode == 'human':
            win.flip()
            return self.viewer.isopen

        image_data = pyglet.image.get_buffer_manager().get_color_buffer(
        ).get_image_data()
        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='')
        arr = arr.reshape(VP_H, VP_W, 4)

        if self.grayscale == 1:
            if self.frames_per_state > 1:
                arr = np.dot(arr[::-1, :, 0:3], [0.299, 0.587, 0.114])
            else:
                arr = np.dot(arr[::-1, :, 0:3],
                             [0.299, 0.587, 0.114]).reshape(VP_H, VP_W, -1)
        elif self.grayscale == 2:
            #arr = np.expand_dims(arr[:,:,1], axis=-1, dtype=np.uint8)
            if self.frames_per_state > 1:
                arr = arr[::-1, :, 1]
            else:
                arr = arr[::-1, :, 1].reshape(VP_H, VP_W, -1)
        else:
            arr = arr[::-1, :, 0:3]

        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def screenshot(self, dest="./", name=None, quality='low'):
        ''' 
        Saves the current state, quality 'low','medium' or 'high', low will save the 
        current state if the quality is low, otherwise will save the current frame
        '''
        if quality == 'low':
            state = self.state
        elif quality == 'medium':
            state = self.render('rgb_array')
        else:
            state = self.render("HD")
        if state is not None:
            for f in range(self.frames_per_state):

                if self.frames_per_state == 1 or quality != 'low':
                    frame_str = ""
                    frame = state
                else:
                    frame_str = "_frame%i" % f
                    frame = state[:, :, f]

                if self.grayscale:
                    frame = np.stack([frame, frame, frame], axis=-1)

                frame = frame.astype(np.uint8)
                im = Image.fromarray(frame)
                if name == None: name = "screenshot_%0.3f" % self.t
                im.save("%s/%s%s.jpeg" % (dest, name, frame_str))
Exemplo n.º 11
0
class CarRacing(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second': FPS
    }

    def __init__(self, verbose=1):
        EzPickle.__init__(self)
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        self.fd_tile = fixtureDef(shape=polygonShape(
            vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))

        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([+1, +1, +1]),
                                       dtype=np.float32)  # steer, gas, brake
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.uint8)

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform(
                0, 2 * math.pi * 1 / CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD
            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))

        # print "\n".join(str(h) for h in checkpoints)
        # self.road_poly = [ (    # uncomment this to see checkpoints
        #    [ (tx,ty) for a,tx,ty in checkpoints ],
        #    (0.7,0.7,0.9) ) ]
        self.road = []

        # Go from onCommunicating the goal of a task to another person is easy: we can use language, show them an image of the desired outcome, point them to a how-to video, or use some combination of all of these. On the other hand, specifying a task to a robot for reinforcement learning requires substantial effort. Most prior work that has applied deep reinforcement learning to real robots makes uses of specialized sensors to obtain rewards or studies tasks where the robot’s internal sensors can be used to measure reward. For example, using thermal cameras for tracking fluids, or purpose-built computer vision systems for tracking objects. Since such instrumentation needs to be done for any new task that we may wish to learn, it poses a significant bottleneck to widespread adoption of reinforcement learning for robotics, and precludes the use of these methods directly in open-world environments that lack this instrumentation.e checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi
            while True:  # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break
                if not failed:
                    break
                alpha -= 2 * math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x * dest_dx + r1y * dest_dy  # destination vector projected on rad
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break
        # print "\n".join([str(t) for t in enumerate(track)])

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[
                i - 1][0] <= self.start_alpha
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" %
                  (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (x1 - TRACK_WIDTH * math.cos(beta1),
                       y1 - TRACK_WIDTH * math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH * math.cos(beta1),
                       y1 + TRACK_WIDTH * math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH * math.cos(beta2),
                       y2 - TRACK_WIDTH * math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH * math.cos(beta2),
                       y2 + TRACK_WIDTH * math.sin(beta2))
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.userData = t
            c = 0.01 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1),
                        y1 + side * TRACK_WIDTH * math.sin(beta1))
                b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                        y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1))
                b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2),
                        y2 + side * TRACK_WIDTH * math.sin(beta2))
                b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                        y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2))
                self.road_poly.append(([b1_l, b1_r, b2_r,
                                        b2_l], (1, 1, 1) if i % 2 == 0 else
                                       (1, 0, 0)))
        self.track = track
        return True

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print(
                    "retry to generate track (normal if there are not many of this messages)"
                )
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]

    def step(self, action):
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS

        self.state = self.render("state_pixels")

        step_reward = 0
        done = False
        if action is not None:  # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count == len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100

        return self.state, step_reward, done, {}

    def render(self, mode='human'):

        assert mode in ['human', 'state_pixels', 'rgb_array']
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label('0000',
                                                 font_size=36,
                                                 x=20,
                                                 y=WINDOW_H * 2.5 / 40.00,
                                                 anchor_x='left',
                                                 anchor_y='center',
                                                 color=(255, 255, 255, 255))
            self.transform = rendering.Transform()

        if "t" not in self.__dict__: return  # reset() not called yet

        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(
            self.t, 1)  # Animate zoom first second
        zoom_state = ZOOM * SCALE * STATE_W / WINDOW_W
        zoom_video = ZOOM * SCALE * VIDEO_W / WINDOW_W
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                            scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                            scroll_y * zoom * math.cos(angle)))
        self.transform.set_rotation(angle)
        self.car.draw(self.viewer, mode != "state_pixels")
        arr = None
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()

        win.clear()
        t = self.transform
        if mode == 'rgb_array':
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == 'state_pixels':
            VP_W = STATE_W
            VP_H = STATE_H
        else:
            pixel_scale = 1
            if hasattr(win.context, '_nscontext'):
                pixel_scale = win.context._nscontext.view().backingScaleFactor(
                )  # pylint: disable=protected-access
            VP_W = int(pixel_scale * WINDOW_W)
            VP_H = int(pixel_scale * WINDOW_H)

        gl.glViewport(0, 0, VP_W, VP_H)
        t.enable()
        self.render_road()
        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()
        self.render_indicators(WINDOW_W, WINDOW_H)

        global previous_road
        global rgb
        if mode == 'human':
            win.flip()
            return self.viewer.isopen

        image_data = pyglet.image.get_buffer_manager().get_color_buffer(
        ).get_image_data()
        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='')
        arr = arr.reshape(VP_H, VP_W, 4)
        arr = arr[::-1, :, 0:3]

        arr = cv2.cvtColor(arr, cv2.COLOR_RGB2GRAY)

        # rgb_weights = [0.2989, 0.5870, 0.1140]
        # arr = np.dot(arr[...,:3], rgb_weights)

        if len(previous_road) >= 1:
            arrs = np.fromstring(previous_road[0].get_data(),
                                 dtype=np.uint8,
                                 sep='')
            arrs = arrs.reshape(VP_H, VP_W, 4)
            arrs = arrs[::-1, :, 0:3]
            rgb = arrs
            # arrs = np.dot(arrs[...,:3], rgb_weights)
            arrs = cv2.cvtColor(arrs, cv2.COLOR_RGB2GRAY)
            previous_road.append(image_data)
            previous_road = previous_road[1:]

            # arrs = np.fromstring(previous_road[0].get_data(), dtype=np.uint8, sep='')
            # arrs = arrs.reshape(VP_H, VP_W, 4)
            # arrs = arrs[::-1, :, 0:3]
            # previous_road.append(image_data)
            #previous_road.append(1)

        else:
            previous_road.append(image_data)
            # print("Printing")
            #print("prev",previous_road)
            arrs = np.fromstring(previous_road[0].get_data(),
                                 dtype=np.uint8,
                                 sep='')
            arrs = arrs.reshape(VP_H, VP_W, 4)
            arrs = arrs[::-1, :, 0:3]
            rgb = arrs
            #arrs = np.dot(arrs[...,:3], rgb_weights)
            arrs = cv2.cvtColor(arrs, cv2.COLOR_RGB2GRAY)
            # previous_road.append(0)
            #print(type(previous_road))

        # grayscales_image = np.dot(arrs[...,:3], rgb_weights)

        #event_frame = cv2.absdiff(arr,arrs)
        global event_frame
        event_frame = cv2.absdiff(arr, arrs)
        imageio.imwrite('image_name.png', event_frame)
        # imageio.imwrite('image_names.png', arrs)
        # imageio.imwrite('events.png', event_frame)

        #print(event_frame)
        return event_frame

    def returnRgb(self):
        return rgb, event_frame

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(0.4, 0.8, 0.4, 1.0)
        gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        gl.glColor4f(0.4, 0.9, 0.4, 1.0)
        k = PLAYFIELD / 20.0
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                gl.glVertex3f(k * x + k, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + k, 0)
                gl.glVertex3f(k * x + k, k * y + k, 0)
        for poly, color in self.road_poly:
            gl.glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)
        gl.glEnd()

    def render_indicators(self, W, H):
        gl.glBegin(gl.GL_QUADS)
        s = W / 40.0
        h = H / 40.0
        gl.glColor4f(0, 0, 0, 1)
        gl.glVertex3f(W, 0, 0)
        gl.glVertex3f(W, 5 * h, 0)
        gl.glVertex3f(0, 5 * h, 0)
        gl.glVertex3f(0, 0, 0)

        def vertical_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h, 0)
            gl.glVertex3f((place + 0) * s, h, 0)

        def horiz_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 2 * h, 0)
            gl.glVertex3f((place + 0) * s, 2 * h, 0)

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
        vertical_ind(7, 0.01 * self.car.wheels[0].omega,
                     (0.0, 0, 1))  # ABS sensors
        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
        gl.glEnd()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()
Exemplo n.º 12
0
class CarRacing(gym.Env, EzPickle):
    metadata = {
        "render.modes": ["human", "rgb_array", "state_pixels", "track_vertex"],
        "video.frames_per_second": FPS,
    }

    def __init__(self, verbose=1):
        EzPickle.__init__(self)
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.grass = []
        self.on_grass_idx = set()
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        self.poly = {'grass': [], 'road': [], 'other': []}

        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([+1, +1, +1]),
                                       dtype=np.float32)  # steer, gas, brake

        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.uint8)

        self.timer = 0

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []

        for t in self.grass:
            self.world.DestroyBody(t)
        self.grass = []

        self.grass_idx = None

        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform(
                0, 2 * math.pi * 1 / CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD
            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))

        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi
            while True:  # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break
                if not failed:
                    break
                alpha -= 2 * math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x * dest_dx + r1y * dest_dy  # destination vector projected on rad
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = (track[i][0] > self.start_alpha
                                  and track[i - 1][0] <= self.start_alpha)
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" %
                  (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]

            # Grass fixtures
            road1_l = (x1 - TRACK_GRASS_WIDTH * math.cos(beta1),
                       y1 - TRACK_GRASS_WIDTH * math.sin(beta1))
            road1_r = (x1 + TRACK_GRASS_WIDTH * math.cos(beta1),
                       y1 + TRACK_GRASS_WIDTH * math.sin(beta1))
            road2_l = (x2 - TRACK_GRASS_WIDTH * math.cos(beta2),
                       y2 - TRACK_GRASS_WIDTH * math.sin(beta2))
            road2_r = (x2 + TRACK_GRASS_WIDTH * math.cos(beta2),
                       y2 + TRACK_GRASS_WIDTH * math.sin(beta2))
            self.poly['grass'].append(([road1_l, road1_r, road2_r,
                                        road2_l], GRASS_COLOR))
            t = self.world.CreateStaticBody(
                fixtures=fixtureDef(shape=polygonShape(
                    vertices=[road1_l, road1_r, road2_r, road2_l]),
                                    isSensor=True))
            t.userData = t
            t.userData.grass_idx = len(self.grass)
            self.grass.append(t)

            # Road fixtures
            road1_l = (x1 - TRACK_WIDTH * math.cos(beta1),
                       y1 - TRACK_WIDTH * math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH * math.cos(beta1),
                       y1 + TRACK_WIDTH * math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH * math.cos(beta2),
                       y2 - TRACK_WIDTH * math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH * math.cos(beta2),
                       y2 + TRACK_WIDTH * math.sin(beta2))
            t = self.world.CreateStaticBody(
                fixtures=fixtureDef(shape=polygonShape(
                    vertices=[road1_l, road1_r, road2_r, road2_l]),
                                    isSensor=True))
            t.userData = t
            t.road_visited = False
            t.road_friction = 1.0
            # Vary the colour of the road
            c = 0.01 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            self.poly['road'].append(([road1_l, road1_r, road2_r,
                                       road2_l], t.color))
            self.road.append(t)

            # Red/white borders (only occur in the render)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1),
                        y1 + side * TRACK_WIDTH * math.sin(beta1))
                b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                        y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1))
                b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2),
                        y2 + side * TRACK_WIDTH * math.sin(beta2))
                b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                        y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2))
                self.poly['other'].append(([b1_l, b1_r, b2_r,
                                            b2_l], (1, 1, 1) if i % 2 == 0 else
                                           (1, 0, 0)))

        self.track = track
        return True

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.poly = {'grass': [], 'road': [], 'other': []}
        self.on_grass_idx.clear()

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print(
                    "retry to generate track (normal if there are not many instances of this message)"
                )
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]

    def step(self, action):
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS

        self.state = self.render("state_pixels")

        step_reward = 0
        done = False
        if action is not None:  # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count == len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD or len(
                    self.on_grass_idx) == 0:
                done = True
                step_reward = -100

        return self.state, step_reward, done, {}

    def render(self, mode='human'):
        assert mode in ["human", "state_pixels", "rgb_array", "track_vertex"]

        # Note that the verticies are arranged in order of how close each tile is.
        # But there is no guarantee in the order of which side (left or right) is returned.
        # And the first tile returned may include the both right and then both left vertices of the quad
        visible_road_vertices = self.getRoadVertices()

        #if self.car is not None and self.car.hull is not None:
        #local_v = np.array([self.car.hull.GetLocalPoint(v) for v in visible_road_vertices])
        #local_v = np.c_[local_v, np.linalg.norm(local_v, axis=1)]
        #if self.timer % 20 == 0:
        #    np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
        #    print(local_v)
        #self.timer += 1

        if mode == "track_vertex":

            if self.car is None or self.car.hull is None:
                return None

            local_v = np.array([
                self.car.hull.GetLocalPoint(v) for v in visible_road_vertices
            ])

            # Return two vectors of each side of the track relative to the frame of the car
            # Also return the following wheel parameters: steer, gas, brake, speed and vehicle speed (forward and sideways)
            arr = np.array([
                self.car.wheels[0].steer, self.car.wheels[2].gas,
                self.car.wheels[0].brake, self.car.wheels[0].vr,
                self.car.wheels[1].vr, self.car.wheels[2].vr,
                self.car.wheels[3].vr, self.car.wheels[2].vf,
                self.car.wheels[2].vs
            ])
            arr = np.r_[arr, local_v[:, 0], local_v[:, 1]]
            return arr

        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label('0000',
                                                 font_size=36,
                                                 x=20,
                                                 y=WINDOW_H * 2.5 / 40.00,
                                                 anchor_x='left',
                                                 anchor_y='center',
                                                 color=(255, 255, 255, 255))
            self.transform = rendering.Transform()
            if not ZOOM_FOLLOW:
                zoom = WINDOW_H / (2 * PLAYFIELD)
                self.transform.set_scale(zoom, zoom)
                self.transform.set_translation(WINDOW_W / 2, WINDOW_H / 2)
                self.transform.set_rotation(0)

        if "t" not in self.__dict__:
            return  # reset() not called yet

        if ZOOM_FOLLOW:
            # Zoom starts at 0.1*SCALE and ends at ZOOM*SCALE
            zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(
                self.t, 1)  # Animate zoom first second
            scroll_x = self.car.hull.position[0]
            scroll_y = self.car.hull.position[1]
            angle = -self.car.hull.angle
            vel = self.car.hull.linearVelocity
            if np.linalg.norm(vel) > 0.5:
                angle = math.atan2(vel[0], vel[1])
            self.transform.set_scale(zoom, zoom)
            self.transform.set_translation(
                WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                                scroll_y * zoom * math.sin(angle)),
                WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                                scroll_y * zoom * math.cos(angle)))
            self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode != "state_pixels")

        arr = None
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()

        # scale the viewport to output at different sizes for ML
        win.clear()
        t = self.transform
        if mode == 'rgb_array':
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == 'state_pixels':
            VP_W = STATE_W
            VP_H = STATE_H
        else:
            pixel_scale = 1
            # an NSOpenGLContext seems to be something in Mac
            # The following line is trying to scale based on Mac's scaling settings
            if hasattr(win.context, '_nscontext'):
                pixel_scale = win.context._nscontext.view().backingScaleFactor(
                )  # pylint: disable=protected-access
            VP_W = int(pixel_scale * WINDOW_W)
            VP_H = int(pixel_scale * WINDOW_H)

        gl.glViewport(0, 0, VP_W, VP_H)
        t.enable()
        self.render_road(visible_road_vertices)
        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()
        self.render_indicators(WINDOW_W, WINDOW_H)

        if mode == 'human':
            win.flip()
            return self.viewer.isopen

        image_data = pyglet.image.get_buffer_manager().get_color_buffer(
        ).get_image_data()
        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='')
        arr = arr.reshape(VP_H, VP_W, 4)
        arr = arr[::-1, :, 0:3]

        return arr

    def getRoadVertices(self):
        vertices = []
        vertices2 = []

        if self.road is not None and len(self.on_grass_idx) > 0:
            tile_idx = max(self.on_grass_idx)
            for i, t in enumerate(self.road):
                if tile_idx is None:
                    break
                if i >= tile_idx and i < tile_idx + LOOK_AHEAD:
                    vertices.extend(t.fixtures[0].shape.vertices)
                if i < (tile_idx + LOOK_AHEAD) - len(self.road):
                    # We're adding this separately as we need to ensure these get added to the end of the list
                    vertices2.extend(t.fixtures[0].shape.vertices)

        vertices.extend(vertices2)

        # Remove duplicates in the road vertices
        # This works because dictionaries can't have duplicate keys
        vertices = list(dict.fromkeys(vertices))

        return vertices

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self, visible_vertices):
        # Draw background
        colors = [0.1, 0.1, 0.1, 1.0] * 4
        polygons_ = [
            +PLAYFIELD,
            +PLAYFIELD,
            0,
            +PLAYFIELD,
            -PLAYFIELD,
            0,
            -PLAYFIELD,
            -PLAYFIELD,
            0,
            -PLAYFIELD,
            +PLAYFIELD,
            0,
        ]

        # Draw grass
        if len(self.on_grass_idx) == 0:
            grass_idx = None  # this only occurs just before a reset when you drive off the track
        else:
            grass_idx = max(self.on_grass_idx)
        for i, (poly, color) in enumerate(self.poly['grass']):
            if grass_idx is not None and (
                (i >= grass_idx and i < grass_idx + LOOK_AHEAD) or i <
                (grass_idx + LOOK_AHEAD) - len(self.poly['grass'])):
                color = VISIBLE_ROAD_COLOR
            colors.extend([*color, 1] * len(poly))
            for p in poly:
                polygons_.extend([p[0], p[1], 0])

        # Draw road and other items (like the red/white borders)
        for key in ['road', 'other']:
            for poly, color in self.poly[key]:
                colors.extend([*color, 1] * len(poly))
                for p in poly:
                    polygons_.extend([p[0], p[1], 0])

        vl = pyglet.graphics.vertex_list(
            len(polygons_) // 3, ("v3f", polygons_), ("c4f", colors))
        vl.draw(gl.GL_QUADS)

        # Draw the visible vertices
        colors = []
        points = []
        for i, v in enumerate(visible_vertices):
            colors.extend([1 - i / len(visible_vertices), 0, 0, 1.0])
            points.extend([v[0], v[1], 0])
        v2 = pyglet.graphics.vertex_list(
            len(points) // 3, ("v3f", points), ("c4f", colors))
        pyglet.gl.glPointSize(5)
        v2.draw(gl.GL_POINTS)
        pyglet.gl.glPointSize(1)

    def render_indicators(self, W, H):
        s = W / 40.0
        h = H / 40.0
        colors = [0, 0, 0, 1] * 4
        polygons = [W, 0, 0, W, 5 * h, 0, 0, 5 * h, 0, 0, 0, 0]

        def vertical_ind(place, val, color):
            colors.extend([color[0], color[1], color[2], 1] * 4)
            polygons.extend([
                place * s,
                h + h * val,
                0,
                (place + 1) * s,
                h + h * val,
                0,
                (place + 1) * s,
                h,
                0,
                (place + 0) * s,
                h,
                0,
            ])

        def horiz_ind(place, val, color):
            colors.extend([color[0], color[1], color[2], 1] * 4)
            polygons.extend([(place + 0) * s,
                             4 * h, 0, (place + val) * s, 4 * h, 0,
                             (place + val) * s, 2 * h, 0, (place + 0) * s,
                             2 * h, 0])

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))

        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
        vertical_ind(7, 0.01 * self.car.wheels[0].omega,
                     (0.0, 0, 1))  # ABS sensors
        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
        vl = pyglet.graphics.vertex_list(
            len(polygons) // 3,
            ("v3f", polygons),
            ("c4f", colors)  # gl.GL_QUADS,
        )
        vl.draw(gl.GL_QUADS)
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()
Exemplo n.º 13
0
class CarRacingFix:
    assert gym.__version__ <= '0.17.1'

    def __init__(self, verbose=1):
        self.contactListener_keep_ref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keep_ref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose

        self.fd_tile = Box2D.b2FixtureDef(shape=Box2D.b2PolygonShape(
            vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))

        self.state_temp = None  # Yonv1943
        self.tile_visited_count = 0
        self.road_poly = []
        self.transform = None
        self.t = None
        self.num_step = 0

        self.env_name = 'CarRacingFix'
        self.state_dim = (STATE_W, STATE_H, 3 * 2)
        self.action_dim = 6
        self.if_discrete = False
        self.max_step = 512
        self.target_return = 950
        self.action_max = 1

    def reset(self):
        self.num_step = 1

        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            # if self.verbose == 1:
            #     print("retry to generate track (normal if there are not many of this messages)")
        self.car = Car(self.world, *self.track[0][1:4])

        self.state_temp = np.zeros((STATE_W, STATE_H, 3), dtype=np.uint8)
        return self.old_step((0, 0, 0))[0]

    def step(self, action):
        try:
            reward0 = self.old_step(action[:3], if_draw=False)[1]
            state, reward1, done, info_dict = self.old_step(action[3:],
                                                            if_draw=True)
            reward = reward0 + reward1
        except Exception as error:
            print(f"| CarRacingFix Error: {error}")
            state = np.stack((self.state_temp, self.state_temp))
            reward = 0
            done = True
            info_dict = dict()

        self.num_step += 1
        if self.num_step == self.max_step:
            done = True
        return state, reward, done, info_dict

    def old_step(self, action, if_draw=True):
        self.car.steer(action[0])
        self.car.gas(action[1])  # np.clip(gas, 0, 1)
        self.car.brake(action[2])

        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS

        self.reward -= 0.1
        # We actually don't want to count fuel spent, we want car to be faster.
        # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
        # self.car.fuel_spent = 0.0

        done = False
        # x, y = self.car.hull.position
        # if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
        #     done = True
        #     step_reward = -100  # Ynv1943: it is a bad design
        if if_draw:
            state = self.render("state_pixels")
            if not (32 < state[16:96, 16:96, 1].mean() <
                    211):  # penalize when outside of road
                # print(f"{state[16:96, 16:96, 1].mean():.3f}")
                self.reward -= 2.0
                done = True
            if self.tile_visited_count == len(self.track):
                done = True
            stack_state = np.concatenate((self.state_temp, state), axis=2)
            self.state_temp = state
        else:
            stack_state = None
        step_reward = self.reward - self.prev_reward
        self.prev_reward = self.reward

        return stack_state, step_reward, done, {}

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render(self, mode='human'):
        assert mode in ['human', 'state_pixels']
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            # self.score_label = pyglet.text.Label('0000', font_size=36,
            #                                      x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x='left', anchor_y='center',
            #                                      color=(255, 255, 255, 255))
            self.transform = rendering.Transform()
        if self.t is None:
            return None

        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(
            self.t, 1)  # Animate zoom first second
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                            scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                            scroll_y * zoom * math.cos(angle)))
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode != "state_pixels")

        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()

        win.clear()
        t = self.transform
        if mode == 'state_pixels':
            vp_w = STATE_W
            vp_h = STATE_H
        else:
            context_nscontext = getattr(win.context, '_nscontext', None)
            pixel_scale = 1 if context_nscontext is None else context_nscontext.view(
            ).backingScaleFactor()
            # pylint: disable=protected-access
            vp_w = int(pixel_scale * WINDOW_W)
            vp_h = int(pixel_scale * WINDOW_H)

        gl.glViewport(0, 0, vp_w, vp_h)
        t.enable()
        self.render_road()
        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()
        # self.render_indicators(WINDOW_W, WINDOW_H)

        if mode == 'human':
            win.flip()
            return self.viewer.isopen

        image_data = pyglet.image.get_buffer_manager().get_color_buffer(
        ).get_image_data()
        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='')
        arr = arr.reshape((vp_h, vp_w, 4))[:, :, :3]
        return arr

    def render_road(self):
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(0.4, 0.8, 0.4, 1.0)
        gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        gl.glColor4f(0.4, 0.9, 0.4, 1.0)
        k = PLAYFIELD / 20.0
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                gl.glVertex3f(k * x + k, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + k, 0)
                gl.glVertex3f(k * x + k, k * y + k, 0)
        for poly, color in self.road_poly:
            gl.glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)
        gl.glEnd()

    def render_indicators(self, w, h):
        gl.glBegin(gl.GL_QUADS)
        s = w / 40.0
        h = h / 40.0
        gl.glColor4f(0, 0, 0, 1)
        gl.glVertex3f(w, 0, 0)
        gl.glVertex3f(w, 5 * h, 0)
        gl.glVertex3f(0, 5 * h, 0)
        gl.glVertex3f(0, 0, 0)

        def vertical_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h, 0)
            gl.glVertex3f((place + 0) * s, h, 0)

        def horiz_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 2 * h, 0)
            gl.glVertex3f((place + 0) * s, 2 * h, 0)

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
        vertical_ind(7, 0.01 * self.car.wheels[0].omega,
                     (0.0, 0, 1))  # ABS sensors
        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
        gl.glEnd()
        # self.score_label.text = "%04i" % self.reward
        # self.score_label.draw()

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        check_point = 12

        # Create checkpoints
        checkpoints = []
        for c in range(check_point):
            alpha = 2 * math.pi * c / check_point + rd.uniform(
                0, 2 * math.pi * 1 / check_point)
            rad = rd.uniform(TRACK_RAD / 3, TRACK_RAD)
            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == check_point - 1:
                alpha = 2 * math.pi * c / check_point
                self.start_alpha = 2 * math.pi * (-0.5) / check_point
                rad = 1.5 * TRACK_RAD
            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))

        # print "\n".join(str(h) for h in checkpoints)
        # self.road_poly = [ (    # uncomment this to see checkpoints
        #    [ (tx,ty) for a,tx,ty in checkpoints ],
        #    (0.7,0.7,0.9) ) ]
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi
            while True:  # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break
                if not failed:
                    break
                alpha -= 2 * math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x * dest_dx + r1y * dest_dy  # destination vector projected on rad
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break
        # print "\n".join([str(t) for t in enumerate(track)])

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha >= track[i -
                                                                         1][0]
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        # if self.verbose == 1:  # Yonv1943
        #     print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (x1 - TRACK_WIDTH * math.cos(beta1),
                       y1 - TRACK_WIDTH * math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH * math.cos(beta1),
                       y1 + TRACK_WIDTH * math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH * math.cos(beta2),
                       y2 - TRACK_WIDTH * math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH * math.cos(beta2),
                       y2 + TRACK_WIDTH * math.sin(beta2))
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.userData = t
            c = 0.01 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1),
                        y1 + side * TRACK_WIDTH * math.sin(beta1))
                b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                        y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1))
                b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2),
                        y2 + side * TRACK_WIDTH * math.sin(beta2))
                b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                        y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2))
                self.road_poly.append(([b1_l, b1_r, b2_r,
                                        b2_l], (1, 1, 1) if i % 2 == 0 else
                                       (1, 0, 0)))
        self.track = track
        return True
Exemplo n.º 14
0
class CarRacing(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second': FPS
    }

    def __init__(self, verbose=1):
        EzPickle.__init__(self)
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        self.possible_actions = ("NOTHING", "LEFT", "RIGHT", "ACCELERATE",
                                 "BREAK")
        self.fd_tile = fixtureDef(shape=polygonShape(
            vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))

        # Discrete action space
        self.action_space = spaces.Discrete(len(self.possible_actions))

        # Frames per state
        frames_per_state = 4
        state_shape = tuple([STATE_H, STATE_W, frames_per_state])

        # Shapes and state
        lst = list(range(frames_per_state))
        self._update_index = [lst[-1]] + lst[:-1]
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=state_shape,
                                            dtype=np.uint8)
        self.state = np.zeros(self.observation_space.shape)

        # No reward early abort
        self._last_rewards_size = 2 * FPS

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform(
                0, 2 * math.pi * 1 / CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD
            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi
            while True:  # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break
                if not failed:
                    break
                alpha -= 2 * math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x * dest_dx + r1y * dest_dy  # destination vector projected on rad
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[
                i - 1][0] <= self.start_alpha
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" %
                  (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (x1 - TRACK_WIDTH * math.cos(beta1),
                       y1 - TRACK_WIDTH * math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH * math.cos(beta1),
                       y1 + TRACK_WIDTH * math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH * math.cos(beta2),
                       y2 - TRACK_WIDTH * math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH * math.cos(beta2),
                       y2 + TRACK_WIDTH * math.sin(beta2))
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.userData = t
            c = 0.01 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1),
                        y1 + side * TRACK_WIDTH * math.sin(beta1))
                b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                        y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1))
                b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2),
                        y2 + side * TRACK_WIDTH * math.sin(beta2))
                b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                        y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2))
                self.road_poly.append(([b1_l, b1_r, b2_r,
                                        b2_l], (1, 1, 1) if i % 2 == 0 else
                                       (1, 0, 0)))
        self.track = track
        return True

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.state = np.zeros(self.observation_space.shape)
        self._last_rewards = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print(
                    "retry to generate track (normal if there are not many instances of this message)"
                )
        self.car = Car(self.world, *self.track[0][1:4])

        # there are 20 frames of noise at the begining (+ 4 frames per state)
        for _ in range(24):
            obs = self.step(None)[0]

        return obs

    def _update_state(self, new_frame):
        self.state[:, :, -1] = new_frame
        self.state = self.state[:, :, self._update_index]

    def _transform_action(self, action):
        if action == 0: action = [0, 0, 0.0]  # Nothing
        if action == 1: action = [-1, 0, 0.0]  # Left
        if action == 2: action = [+1, 0, 0.0]  # Right
        if action == 3: action = [0, +1, 0.0]  # Accelerate
        if action == 4: action = [0, 0, 0.8]  # break

        return action

    def step(self, action):
        action = self._transform_action(action)

        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS

        self._update_state(self.render("state_pixels"))

        step_reward = 0
        done = False
        fail = False

        if action is not None:  # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward

            x, y = self.car.hull.position

            # Track done
            if self.tile_visited_count == len(self.track):
                done = True

            # Car out of playfield
            elif abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                if self.verbose == 1:
                    print("Killed because out of playing field")
                fail = True

            # If too good or too bad
            elif self.reward > 1000 or self.reward < -200:
                if self.verbose == 1:
                    print("Killed because of too low or too high reward")
                fail = True

            # Early abort when no points were gained recently
            elif len(self._last_rewards) == self._last_rewards_size and max(
                    self._last_rewards) <= 0:
                if self.verbose == 1:
                    print("Killed because of no recent progress")
                fail = True

            if fail:
                done = True
                step_reward = -100

            self._last_rewards.append(step_reward)
            if len(self._last_rewards) > self._last_rewards_size:
                self._last_rewards.pop(0)

        return self.state, step_reward, done, {}

    def render(self, mode='human'):
        assert mode in ['human', 'state_pixels', 'rgb_array']
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label('0000',
                                                 font_size=36,
                                                 x=20,
                                                 y=WINDOW_H * 2.5 / 40.00,
                                                 anchor_x='left',
                                                 anchor_y='center',
                                                 color=(255, 255, 255, 255))
            self.transform = rendering.Transform()

        if "t" not in self.__dict__: return  # reset() not called yet

        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(
            self.t, 1)  # Animate zoom first second
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                            scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                            scroll_y * zoom * math.cos(angle)))
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode != "state_pixels")

        arr = None
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()

        win.clear()
        t = self.transform
        if mode == 'rgb_array':
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == 'state_pixels':
            VP_W = STATE_W
            VP_H = STATE_H
        else:
            pixel_scale = 1
            if hasattr(win.context, '_nscontext'):
                pixel_scale = win.context._nscontext.view().backingScaleFactor(
                )  # pylint: disable=protected-access
            VP_W = int(pixel_scale * WINDOW_W)
            VP_H = int(pixel_scale * WINDOW_H)

        gl.glViewport(0, 0, VP_W, VP_H)
        t.enable()
        self.render_road()
        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()

        # Don't show indicators
        #self.render_indicators(WINDOW_W, WINDOW_H)

        if mode == 'human':
            win.flip()
            return self.viewer.isopen

        image_data = pyglet.image.get_buffer_manager().get_color_buffer(
        ).get_image_data()
        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='')
        arr = arr.reshape(VP_H, VP_W, 4)
        arr = arr[::-1, :, 0:3]

        # Convert to grayscale
        if mode == 'state_pixels':
            arr = np.dot(arr[..., :3], [0.299, 0.587, 0.114])

        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(0.4, 0.8, 0.4, 1.0)
        gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        gl.glColor4f(0.4, 0.9, 0.4, 1.0)
        k = PLAYFIELD / 20.0
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                gl.glVertex3f(k * x + k, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + k, 0)
                gl.glVertex3f(k * x + k, k * y + k, 0)
        for poly, color in self.road_poly:
            gl.glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)
        gl.glEnd()

    def render_indicators(self, W, H):
        gl.glBegin(gl.GL_QUADS)
        s = W / 40.0
        h = H / 40.0
        gl.glColor4f(0, 0, 0, 1)
        gl.glVertex3f(W, 0, 0)
        gl.glVertex3f(W, 5 * h, 0)
        gl.glVertex3f(0, 5 * h, 0)
        gl.glVertex3f(0, 0, 0)

        def vertical_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h, 0)
            gl.glVertex3f((place + 0) * s, h, 0)

        def horiz_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 2 * h, 0)
            gl.glVertex3f((place + 0) * s, 2 * h, 0)

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
        vertical_ind(7, 0.01 * self.car.wheels[0].omega,
                     (0.0, 0, 1))  # ABS sensors
        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
        gl.glEnd()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()
Exemplo n.º 15
0
class CarRacingPoSBase(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second': FPS
    }

    def __init__(self, verbose=0):
        EzPickle.__init__(self)
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        self.fd_tile = fixtureDef(shape=polygonShape(
            vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))
        self.slowness = 0
        """
        Action Space:
        1) Steer: Discrete 3  - NOOP[0], Left[1], Right[2] - params: min: 0, max: 2
        2) Gas: Discrete 2 - NOOP[0], Go[1] - params: min: 0, max: 1
        3) Brake: Discrete 2  - NOOP[0], Brake[1] - params: min: 0, max: 1

        Observation Space:
        1) Speed: SPEED_INTERVALS + 1 discrete speeds
        2) Sensors: RAY_CAST_INTERVALS * NUM_SENSORS
        3) Wheel off or not ( for each wheel): 2
        4) Steering: STEER_INTERVALS

        """

        self.set_action_space()
        self.set_observation_space()

    # Override with subclass
    def set_action_space(self):
        raise NotImplementedError

    # Override with subclass
    def set_observation_space(self):
        raise NotImplementedError

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # direction = -1 for right turns, 1 for left turns
        direction = self.track_direction

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform(
                0, 2 * math.pi * 1 / CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD
            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))

        # print("\n".join(str(h) for h in checkpoints))
        # self.road_poly = [ (    # uncomment this to see checkpoints
        #    [ (tx,ty) for a,tx,ty in checkpoints ],
        #    (0.7,0.7,0.9) ) ]
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi
            while True:  # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break
                if not failed:
                    break
                alpha -= 2 * math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x * dest_dx + r1y * dest_dy  # destination vector projected on rad
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, direction * x,
                          direction * y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break
        # print "\n".join([str(t) for t in enumerate(track)])

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[
                i - 1][0] <= self.start_alpha
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" %
                  (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (x1 - TRACK_WIDTH * math.cos(beta1),
                       y1 - TRACK_WIDTH * math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH * math.cos(beta1),
                       y1 + TRACK_WIDTH * math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH * math.cos(beta2),
                       y2 - TRACK_WIDTH * math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH * math.cos(beta2),
                       y2 + TRACK_WIDTH * math.sin(beta2))
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.userData = t
            c = 0.01 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1),
                        y1 + side * TRACK_WIDTH * math.sin(beta1))
                b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                        y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1))
                b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2),
                        y2 + side * TRACK_WIDTH * math.sin(beta2))
                b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                        y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2))
                self.road_poly.append(([b1_l, b1_r, b2_r,
                                        b2_l], (1, 1, 1) if i % 2 == 0 else
                                       (1, 0, 0)))
        self.track = track
        return True

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.track_direction = random.choice([-1, 1])
        if self.viewer:
            self.viewer.geoms = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print(
                    "retry to generate track (normal if there are not many of this messages)"
                )
        self.car = Car(self.world, *self.track[0][1:4], draw_car=True)

        return self.step(None)[0]

    def step(self, action):
        raise NotImplementedError

    def render(self, mode='human'):
        assert mode in ['human', 'state_pixels', 'rgb_array']
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label('0000',
                                                 font_size=36,
                                                 x=20,
                                                 y=WINDOW_H * 2.5 / 40.00,
                                                 anchor_x='left',
                                                 anchor_y='center',
                                                 color=(255, 255, 255, 255))
            self.transform = rendering.Transform()

        if "t" not in self.__dict__: return  # reset() not called yet

        zoom = 0.3 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(
            self.t, 1)  # Animate zoom first second
        zoom_state = ZOOM * SCALE * STATE_W / WINDOW_W
        zoom_video = ZOOM * SCALE * VIDEO_W / WINDOW_W
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                            scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 2 - (scroll_x * zoom * math.sin(angle) +
                            scroll_y * zoom * math.cos(angle)))
        self.transform.set_rotation(angle)
        # self.transform.set_scale(2, 2)
        # self.transform.set_translation(WINDOW_W/2, WINDOW_H/2)
        self.car.draw(self.viewer, mode != "state_pixels")

        arr = None
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()

        win.clear()
        t = self.transform
        if mode == 'rgb_array':
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == 'state_pixels':
            VP_W = STATE_W
            VP_H = STATE_H
        else:
            pixel_scale = 1
            if hasattr(win.context, '_nscontext'):
                pixel_scale = win.context._nscontext.view().backingScaleFactor(
                )  # pylint: disable=protected-access
            VP_W = int(pixel_scale * WINDOW_W)
            VP_H = int(pixel_scale * WINDOW_H)

        gl.glViewport(0, 0, VP_W, VP_H)
        t.enable()
        self.render_road()
        self.render_raycasts()
        self.render_wall_segments()
        self.render_intersections()
        for geom in self.viewer.onetime_geoms:
            geom.render()
        for geom in self.viewer.geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()
        self.render_indicators(WINDOW_W, WINDOW_H)

        if mode == 'human':
            win.flip()
            return self.viewer.isopen

        image_data = pyglet.image.get_buffer_manager().get_color_buffer(
        ).get_image_data()
        arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
        arr = arr.reshape(VP_H, VP_W, 4)
        arr = arr[::-1, :, 0:3]

        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(0.4, 0.8, 0.4, 1.0)
        gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        gl.glColor4f(0.4, 0.9, 0.4, 1.0)
        k = PLAYFIELD / 20.0
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                gl.glVertex3f(k * x + k, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + k, 0)
                gl.glVertex3f(k * x + k, k * y + k, 0)
        for poly, color in self.road_poly:
            gl.glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)
        gl.glEnd()

    def render_indicators(self, W, H):
        gl.glBegin(gl.GL_QUADS)
        s = W / 40.0
        h = H / 40.0
        gl.glColor4f(0, 0, 0, 1)
        gl.glVertex3f(W, 0, 0)
        gl.glVertex3f(W, 5 * h, 0)
        gl.glVertex3f(0, 5 * h, 0)
        gl.glVertex3f(0, 0, 0)

        def vertical_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h, 0)
            gl.glVertex3f((place + 0) * s, h, 0)

        def horiz_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 2 * h, 0)
            gl.glVertex3f((place + 0) * s, 2 * h, 0)

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
        vertical_ind(7, 0.01 * self.car.wheels[0].omega,
                     (0.0, 0, 1))  # ABS sensors
        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
        gl.glEnd()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()

    def render_raycasts(self):
        if hasattr(self, "raycasts"):
            intersections = None
            if hasattr(self, "intersections"):
                intersections = [
                    raycast_i for point, raycast_i in self.intersections
                ]
            for raycast_i, raycast in enumerate(self.raycasts):
                start_point = (raycast[0][0], raycast[0][1])
                if intersections and raycast_i in intersections:
                    int_point_i = intersections.index(raycast_i)
                    end_point = self.intersections[int_point_i][0]
                else:
                    end_point = (raycast[1][0], raycast[1][1])
                self.viewer.draw_line(start=start_point,
                                      end=end_point,
                                      color=(1, 0.0, 0.0),
                                      linewidth=3)

    def render_wall_segments(self):
        if hasattr(self, "wall_segments"):
            for path in self.wall_segments:
                self.viewer.draw_line(start=path[0],
                                      end=path[1],
                                      color=(0.0, 0.0, 1),
                                      linewidth=3)

    def render_intersections(self):
        if hasattr(self, "intersections"):
            for point, raycast_i in self.intersections:
                self.viewer.draw_circle(point, color=(0.0, 1, 0.0), radius=1)

    def get_min_distances(self):
        # Retrieves the distance to the nearest track tile centroid. Returns distance from left and right wheels, and close tiles
        wheels = self.car.wheels
        (front_left_wheel, front_right_wheel) = (wheels[0].position,
                                                 wheels[1].position)
        min_left_distance = 9999
        min_right_distance = 9999
        close_tiles = []
        for road_tile in self.road:
            road_tile_position = road_tile.fixtures[0].shape.centroid
            lt_distance = math.sqrt(
                abs(road_tile_position.x - front_left_wheel.x)**2 +
                abs(road_tile_position.y - front_left_wheel.y)**2)
            if lt_distance < min_left_distance:
                min_left_distance = lt_distance
            rt_distance = math.sqrt(
                abs(road_tile_position.x - front_right_wheel.x)**2 +
                abs(road_tile_position.y - front_right_wheel.y)**2)
            if rt_distance < min_right_distance:
                min_right_distance = rt_distance

            if lt_distance < RAY_CAST_DISTANCE or rt_distance < RAY_CAST_DISTANCE:
                close_tiles.append((road_tile, lt_distance))
        close_tiles.sort(key=lambda x: x[1])
        close_tiles = [x[0] for x in close_tiles]

        return min_left_distance, min_right_distance, close_tiles

    def get_raycast_points(self, tiles):
        # Loop through my raycast sensors and find intersection distances for each sensor with the given tiles.
        # Angles are arc from -90deg to +90deg
        start_angle = -math.pi / 4
        end_angle = math.pi / 4
        interval = abs(start_angle - end_angle)
        rotation = math.pi / 2  # Correction factor
        angles = np.arange(start_angle,
                           end_angle + interval / (NUM_SENSORS - 1),
                           interval / (NUM_SENSORS - 1))
        # Add current orientation of car
        angles = [i + self.car.hull.angle + rotation for i in angles]
        # Get relative endpoints of raycast
        rel_endpts = [(math.cos(a) * RAY_CAST_DISTANCE,
                       math.sin(a) * RAY_CAST_DISTANCE) for a in angles]
        # Get global enpoints of raycast
        endpts = [(x + self.car.hull.position.x, y + self.car.hull.position.y)
                  for x, y in rel_endpts]
        # Get line segments from car to end of raycast
        raycasts = [((self.car.hull.position.x, self.car.hull.position.y),
                     endpoint) for endpoint in endpts]

        self.raycasts = raycasts
        self.raycast_angles = angles

        # Get wall segments
        wall_segments = []
        for tile in tiles:
            verts = tile.fixtures[0].shape.vertices
            if len(verts) < 4:
                continue
            dist1 = math.sqrt((verts[0][0] - verts[3][0])**2 +
                              (verts[0][1] - verts[3][1])**2)
            dist2 = math.sqrt((verts[0][0] - verts[1][0])**2 +
                              (verts[2][1] - verts[3][1])**2)
            if dist1 < dist2:
                wall_segments.append((verts[0], verts[3]))
                wall_segments.append((verts[1], verts[2]))
            else:
                wall_segments.append((verts[0], verts[1]))
                wall_segments.append((verts[2], verts[3]))

        # Add them to  be drawn later
        self.wall_segments = []
        for wall_segment in wall_segments:
            path = [(wall_segment[0][0], wall_segment[0][1]),
                    (wall_segment[1][0], wall_segment[1][1])]
            self.wall_segments.append(path)

        def intersection(seg1, seg2):
            # Based on this formula http://www-cs.ccny.cuny.edu/~wolberg/capstone/intersection/Intersection%20point%20of%20two%20lines.html
            x1, y1 = seg1[0]
            x2, y2 = seg1[1]
            x3, y3 = seg2[0]
            x4, y4 = seg2[1]

            denom = (x2 - x1) * (y4 - y3) - (x4 - x3) * (y2 - y1)
            if math.isclose(denom, 0):
                # Denominator close to 0 means lines parallel
                return None

            t_num = (x4 - x3) * (y1 - y3) - (y4 - y3) * (x1 - x3)
            t = t_num / denom
            u_num = (x2 - x1) * (y1 - y3) - (x1 - x3) * (y2 - y1)
            u = u_num / denom

            if t >= 0.0 and t <= 1.0 and u >= 0.0 and u <= 1.0:
                return (x1 + t * (x2 - x1), y1 + t * (y2 - y1))

        # Loop through points, get the intersection of the closest wall
        int_dist = []
        self.intersections = []
        for raycast_i, raycast in enumerate(raycasts):
            ray_int_points = []
            for wall in wall_segments:
                int_point = intersection(raycast, wall)
                if int_point is not None:
                    dist = math.sqrt(
                        (self.car.hull.position.x - int_point[0])**2 +
                        (self.car.hull.position.y - int_point[1])**2)
                    ray_int_points.append((dist, [int_point[0], int_point[1]]))
            if ray_int_points:
                ray_int_points.sort()
                dist = ray_int_points[0][0]
                point = ray_int_points[0][1]
                self.intersections.append((point, raycast_i))
                int_dist.append(dist)
            else:
                int_dist.append(RAY_CAST_DISTANCE - 1)  # Max range

        return int_dist
Exemplo n.º 16
0
class Environment(gym.Env, EzPickle):
    def __init__(self, verbose=False):
        EzPickle.__init__(self)

        # General and utils variables
        self.verbose = verbose
        self.np_random = None
        self.seed()

        # Box2D variables
        self.time = -1.0  # Set time to -1.0 to indicate that models is not ready yet
        self.car = None
        self.contact_listener = ContactListener(self)
        self.world = b2World((0, 0), contactListener=self.contact_listener)
        self.ground = None
        self.track_tiles_coordinates = None  # For easy access in StateTransformer
        self.track_tiles = []
        self.cones = []
        self.tile_visited_count = 0

        # PyGLet variables
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.score_label = None
        self.transform = None

        # RL-related variables
        # action_space has the following structure (steer, gas, brake). -1, +1 is for left and right steering
        self.state = None
        self.done = False
        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([+1, +1, +1]),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.uint8)
        self.reward = 0.0
        self.prev_reward = 0.0

    def step(self, action):
        # Track previous reward before it gets updated
        self.prev_reward = self.reward

        car = self.car
        world = self.world

        # Apply action
        if action is not None:
            car.steer(-action[0])
            car.gas(action[1])
            car.brake(action[2])
        car.step(1.0 / FPS)
        world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        # Update elapsed time
        self.time += 1.0 / FPS
        # Since we are assuming car to have infinite fuel, always set fuel_spent to 0
        # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
        car.fuel_spent = 0.0

        # Calculate step reward
        step_reward = 0
        # Penalty for stopping and wasting time
        self.reward -= 0.1
        # Compute step reward and update previous reward
        step_reward += self.reward - self.prev_reward  # Current recorded reward minus previous reward

        # Check if done
        if self.tile_visited_count == len(self.track_tiles):
            self.done = True

        # Penalise further and terminate if car is out of bounds
        x, y = car.hull.position
        if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
            self.done = True
            step_reward -= 100

        self.state = StateTransformer.transform(self)

        return self.state, step_reward, self.done, {}

    def reset(self):
        self._destroy()
        self.time = -1.0
        self.tile_visited_count = 0
        self.state = None
        self.done = False
        self.reward = 0.0
        self.prev_reward = 0.0

        # Build ground
        self.ground = Ground(self.world, PLAYFIELD, PLAYFIELD)

        # Build track tiles
        self.track_tiles_coordinates = TrackCoordinatesBuilder.load_track(self)
        self.track_tiles = [
            TrackTile(self.world, self.track_tiles_coordinates[i],
                      self.track_tiles_coordinates[i - 1])
            for i, element in enumerate(self.track_tiles_coordinates)
        ]
        # Build cones
        cones_coordinates = []
        for i in range(0, len(self.track_tiles)):
            sensor_vertices = self.track_tiles[i].b2Data.fixtures[
                0].shape.vertices
            for j in range(0, len(sensor_vertices)):
                cones_coordinates.append(sensor_vertices[j])
        self.cones = [
            Cone(world=self.world,
                 position=(cone_coordinate[0], cone_coordinate[1]))
            for cone_coordinate in cones_coordinates
        ]

        init_angle = 0
        init_x, init_y = self.track_tiles[0].position

        self.car = Car(self.world,
                       init_angle=init_angle,
                       init_x=init_x,
                       init_y=init_y)

        return self.step(None)[0]

    def render(self, mode='human'):
        assert mode in ['human', 'state_pixels', 'rgb_array']

        # Instantiate viewer
        if self.viewer is None:
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label('0000',
                                                 font_size=36,
                                                 x=20,
                                                 y=WINDOW_H * 2.5 / 40.00,
                                                 anchor_x='left',
                                                 anchor_y='center',
                                                 color=(255, 255, 255, 255))
            self.transform = rendering.Transform()

        # reset() not called yet, so no need to render
        if self.time == -1.0:
            return

        self.car.draw(self.viewer, mode != "state_pixels")
        self.transform = follower_view_transform(self.car, self.time)

        # Setup window
        window = self.viewer.window
        window.switch_to()
        window.dispatch_events()
        window.clear()
        VP_W, VP_H = get_viewport_size(mode, window)

        # Start drawing
        gl.glViewport(0, 0, VP_W, VP_H)
        # Transform view to follow the car and render the contents of the world
        self.transform.enable()
        self.render_world()
        # Render onetime geometries
        for geom in self.viewer.onetime_geoms:
            geom.render()
        # And empty the geometries afterwards
        self.viewer.onetime_geoms = []
        # Since the world has been rendered, and indicators below are not part of the world, disable transform
        self.transform.disable()
        render_indicators(WINDOW_W,
                          WINDOW_H,
                          car=self.car,
                          reward=self.reward,
                          score_label=self.score_label)

        if mode == 'human':
            window.flip()
            return self.viewer.isopen
        else:
            image_data = pyglet.image.get_buffer_manager().get_color_buffer(
            ).get_image_data()
            arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='')
            arr = arr.reshape(VP_H, VP_W, 4)
            arr = arr[::-1, :, 0:3]
            return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.track_tiles:
            return
        self.world.DestroyBody(self.ground.b2Data)
        for track_tile in self.track_tiles:
            self.world.DestroyBody(track_tile.b2Data)
        self.track_tiles = []
        self.car.destroy()

    def render_world(self):
        gl.glBegin(gl.GL_QUADS)

        self.ground.render()

        for tile in self.track_tiles:
            tile.render()

        for cone in self.cones:
            cone.render()

        gl.glEnd()