Пример #1
0
    def reset(self):

        # Destroy all objects
        self._destroy()

        # TODO: decide on the reward
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tot_reward = [0.0 for _ in range(NUM_VEHICLES)]
        
        # Transition for zoom
        self.t = make_n_times(NUM_VEHICLES)

        # Rendering values
        self.road_poly = []
        self.human_render = False

        while True:
            success = self._create_track()
            if success: break
            print("retry to generate track (normal if there are not many of this messages)")
        
        # randomly init each car
        if not self.init_pos:
            rect_poly_indices = [i for i in range(len(self.directions)) if self.directions[i] in "lrtb"]
            random_choices = np.random.choice(rect_poly_indices, NUM_VEHICLES, replace=False)
            for car_idx, rid in enumerate(random_choices):
                rect_poly = np.array(self.road_poly[rid][0])
                direction = self.directions[rid]
                x = np.mean(rect_poly[:, 0])
                y = np.mean(rect_poly[:, 1])
                if direction == "r":
                    angle = -90
                elif direction == "t":
                    angle = 0
                elif direction == "l":
                    angle = 90
                else:
                    angle = 180
                self.cars[car_idx] = Car(self.world, angle*math.pi/180.0, x, y)
        else:
            for car_idx, init_p in enumerate(self.init_pos):
                i, j, angle = init_p
                i -= 0.5
                j += 0.5
                x, y = i*EDGE_WIDTH, j*EDGE_WIDTH
                x += self.off_params[0]
                y += self.off_params[1]
                self.cars[car_idx] = Car(self.world, angle*math.pi/180.0, x, y)

        # return states after init
        return self.step([None for i in range(NUM_VEHICLES)])[0]
Пример #2
0
    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.t = 0.0
        self.road_poly = []
        self.state = State()

        while True:
            success = self._create_track()
            if success: break
            print(
                "retry to generate track (normal if there are not many of this messages)"
            )
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]
Пример #3
0
    def _reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.human_render = False

        while True:
            success = self._create_track()
            if success: break
            print("retry to generate track (normal if there are not many of this messages)")
        self.car = Car(self.world, *self.track[0][1:4])

        return self._step(None)[0]
def init_planner(prior_map):
    # load in vehicle params
    with open(CAR_PARAMS_FILE, 'r') as f:
        car_params = yaml.load(f)
    # define search params
    eps = 2.0
    dist_cost = 1
    time_cost = 1
    roughness_cost = 1
    cost_weights = (dist_cost, time_cost, roughness_cost)

    # Define action space
    dt = 0.1
    T = 1.0
    # velocities
    # TODO: Let handle negative velocities, but enforce basic dynamic windowing
    max_speed = car_params["max_speed"]
    num_speed = 3
    velocities = np.linspace(start=0, stop=max_speed, num=num_speed) / dt
    dv = velocities[1] - velocities[0]
    # steer angles
    max_abs_steer = car_params["max_steer"]
    num_steer = 3
    steer_angles = np.linspace(-max_abs_steer, max_abs_steer, num=num_steer)

    # create simple car model for planning
    mid_to_wheel_length = car_params["car_length"] / 2.0
    car = Car(L=mid_to_wheel_length,
              max_v=car_params["max_speed"],
              max_steer=car_params["max_steer"],
              wheel_radius=car_params["wheel_radius"])

    # define heading space
    start, stop, step = 0, 315, 45
    num_thetas = int((stop - start) / step) + 1
    thetas = np.linspace(start=0, stop=315, num=num_thetas)
    thetas = thetas / RAD_TO_DEG  # convert to radians
    dtheta = step / RAD_TO_DEG

    # collective variables for discretizing C-sapce
    dy, dx = 1.0, 1.0
    miny, minx = 0, 0
    dstate = np.array([dx, dy, dtheta, dv, dt])
    min_state = np.array([minx, miny, min(thetas), min(velocities), 0])

    # get planners
    planner = create_planner(cost_weights=cost_weights,
                             thetas=thetas,
                             steer_angles=steer_angles,
                             velocities=velocities,
                             car=car,
                             min_state=min_state,
                             dstate=dstate,
                             prior_map=prior_map,
                             eps=eps,
                             T=T)

    # store planner in file to be used for planning
    with open(PLANNER_FILE, "w") as f:
        pickle.dump(planner, f)
    def reset(self, track=None):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.track_pack = None
        if track is None:
            while True:
                success = self._create_track([], [])
                if success:
                    break
        else:
            self._create_track(track['noises'], track['rads'])
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]
Пример #6
0
    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print("retry to generate track (normal if there are not many"
                      "instances of this message)")
        self.car = Car(self.world,
                       *self.track[0][1:4],
                       sensors_activated=self.sensors_activated)

        return self.step([0, 0.1, 0])[0]
Пример #7
0
    def _reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.human_render = False

        while True:
            success = self._create_track()
            if success: break
            print("retry to generate track (normal if there are not many of this messages)")
        self.car = Car(self.world, *self.track[0][1:4])

        return self._step(None)[0]
Пример #8
0
class CarRacing(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second' : FPS
    }

    def __init__(self):
        self._seed()
        self.world = Box2D.b2World((0,0), contactListener=FrictionDetector(self))
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0

        self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]))  # steer, gas, brake
        self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3))

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road: return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD)
            if c==0:
                alpha = 0
                rad = 1.5*TRACK_RAD
            if c==CHECKPOINTS-1:
                alpha = 2*math.pi*c/CHECKPOINTS
                self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS
                rad = 1.5*TRACK_RAD
            checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) )

        #print "\n".join(str(h) for h in checkpoints)
        #self.road_poly = [ (    # uncomment this to see checkpoints
        #    [ (tx,ty) for a,tx,ty in checkpoints ],
        #    (0.7,0.7,0.9) ) ]
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5*TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while 1:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2*math.pi
            while True: # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0: break
                if not failed: break
                alpha -= 2*math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x*dest_dx + r1y*dest_dy  # destination vector projected on rad
            while beta - alpha >  1.5*math.pi: beta -= 2*math.pi
            while beta - alpha < -1.5*math.pi: beta += 2*math.pi
            prev_beta = beta
            proj *= SCALE
            if proj >  0.3: beta -= min(TRACK_TURN_RATE, abs(0.001*proj))
            if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001*proj))
            x += p1x*TRACK_DETAIL_STEP
            y += p1y*TRACK_DETAIL_STEP
            track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) )
            if laps > 4: break
            no_freeze -= 1
            if no_freeze==0: break
        #print "\n".join([str(t) for t in enumerate(track)])

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i==0: return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha
            if pass_through_start and i2==-1:
                i2 = i
            elif pass_through_start and i1==-1:
                i1 = i
                break
        print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1))
        assert i1!=-1
        assert i2!=-1

        track = track[i1:i2-1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square( first_perp_x*(track[0][2] - track[-1][2]) ) +
            np.square( first_perp_y*(track[0][3] - track[-1][3]) ))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False]*len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i-neg-0][1]
                beta2 = track[i-neg-1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i-neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i-1]
            road1_l = (x1 - TRACK_WIDTH*math.cos(beta1), y1 - TRACK_WIDTH*math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH*math.cos(beta1), y1 + TRACK_WIDTH*math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH*math.cos(beta2), y2 - TRACK_WIDTH*math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH*math.cos(beta2), y2 + TRACK_WIDTH*math.sin(beta2))
            t = self.world.CreateStaticBody( fixtures = fixtureDef(
                shape=polygonShape(vertices=[road1_l, road1_r, road2_r, road2_l])
                ))
            t.userData = t
            c = 0.01*(i%3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(( [road1_l, road1_r, road2_r, road2_l], t.color ))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side* TRACK_WIDTH        *math.cos(beta1), y1 + side* TRACK_WIDTH        *math.sin(beta1))
                b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1))
                b2_l = (x2 + side* TRACK_WIDTH        *math.cos(beta2), y2 + side* TRACK_WIDTH        *math.sin(beta2))
                b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2))
                self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) ))
        self.track = track
        return True

    def _reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.human_render = False

        while True:
            success = self._create_track()
            if success: break
            print("retry to generate track (normal if there are not many of this messages)")
        self.car = Car(self.world, *self.track[0][1:4])

        return self._step(None)[0]

    def _step(self, action):
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(1.0/FPS)
        self.world.Step(1.0/FPS, 6*30, 2*30)
        self.t += 1.0/FPS

        self.state = self._render("state_pixels")

        step_reward = 0
        done = False
        if action is not None: # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            #self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count==len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100

        return self.state, step_reward, done, {}

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return

        if self.viewer is None:
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label('0000', font_size=36,
                x=20, y=WINDOW_H*2.5/40.00, anchor_x='left', anchor_y='center',
                color=(255,255,255,255))
            self.transform = rendering.Transform()

        if "t" not in self.__dict__: return  # reset() not called yet

        zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1)   # Animate zoom first second
        zoom_state  = ZOOM*SCALE*STATE_W/WINDOW_W
        zoom_video  = ZOOM*SCALE*VIDEO_W/WINDOW_W
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)),
            WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) )
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode!="state_pixels")

        arr = None
        win = self.viewer.window
        if mode != 'state_pixels':
            win.switch_to()
            win.dispatch_events()
        if mode=="rgb_array" or mode=="state_pixels":
            win.clear()
            t = self.transform
            if mode=='rgb_array':
                VP_W = VIDEO_W
                VP_H = VIDEO_H
            else:
                VP_W = STATE_W
                VP_H = STATE_H
            glViewport(0, 0, VP_W, VP_H)
            t.enable()
            self._render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()
            t.disable()
            self._render_indicators(WINDOW_W, WINDOW_H)  # TODO: find why 2x needed, wtf
            image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
            arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
            arr = arr.reshape(VP_H, VP_W, 4)
            arr = arr[::-1, :, 0:3]

        if mode=="rgb_array" and not self.human_render: # agent can call or not call env.render() itself when recording video.
            win.flip()

        if mode=='human':
            self.human_render = True
            win.clear()
            t = self.transform
            glViewport(0, 0, WINDOW_W, WINDOW_H)
            t.enable()
            self._render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()
            t.disable()
            self._render_indicators(WINDOW_W, WINDOW_H)
            win.flip()

        self.viewer.onetime_geoms = []
        return arr

    def _render_road(self):
        glBegin(GL_QUADS)
        glColor4f(0.4, 0.8, 0.4, 1.0)
        glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        glColor4f(0.4, 0.9, 0.4, 1.0)
        k = PLAYFIELD/20.0
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                glVertex3f(k*x + k, k*y + 0, 0)
                glVertex3f(k*x + 0, k*y + 0, 0)
                glVertex3f(k*x + 0, k*y + k, 0)
                glVertex3f(k*x + k, k*y + k, 0)
        for poly, color in self.road_poly:
            glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                glVertex3f(p[0], p[1], 0)
        glEnd()

    def _render_indicators(self, W, H):
        glBegin(GL_QUADS)
        s = W/40.0
        h = H/40.0
        glColor4f(0,0,0,1)
        glVertex3f(W, 0, 0)
        glVertex3f(W, 5*h, 0)
        glVertex3f(0, 5*h, 0)
        glVertex3f(0, 0, 0)
        def vertical_ind(place, val, color):
            glColor4f(color[0], color[1], color[2], 1)
            glVertex3f((place+0)*s, h + h*val, 0)
            glVertex3f((place+1)*s, h + h*val, 0)
            glVertex3f((place+1)*s, h, 0)
            glVertex3f((place+0)*s, h, 0)
        def horiz_ind(place, val, color):
            glColor4f(color[0], color[1], color[2], 1)
            glVertex3f((place+0)*s, 4*h , 0)
            glVertex3f((place+val)*s, 4*h, 0)
            glVertex3f((place+val)*s, 2*h, 0)
            glVertex3f((place+0)*s, 2*h, 0)
        true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(5, 0.02*true_speed, (1,1,1))
        vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors
        vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1))
        vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1))
        vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1))
        horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0))
        horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0))
        glEnd()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()
Пример #9
0
class CarRacing(gym.Env, EzPickle):

    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second': FPS
    }

    def __init__(self, obstacle_prob, verbose=1, sensors_activated=True):
        EzPickle.__init__(self)
        self.obstacles_positions = [
        ]  # sera rempli de 4-tuples contenant la position de chaque mur
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.times_succeeded = 0
        self.verbose = verbose
        self.sensors_activated = sensors_activated
        self.fd_tile = fixtureDef(shape=polygonShape(
            vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))

        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([+1, +1, +1]),
                                       dtype=np.float32)  # steer, gas, brake

        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.uint8)
        self.random_obs = 0
        global OBSTACLE_PROB
        OBSTACLE_PROB = obstacle_prob

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):

        CHECKPOINTS = 12  # 12 = nombre de virages (11) + le départ (1)

        # Create checkpoints
        checkpoints = []
        self.obstacles_positions = []
        for c in range(CHECKPOINTS):
            noise = self.np_random.uniform(0, 2 * 3.14159 * 1 / CHECKPOINTS)
            alpha = 2 * 3.14159 * c / CHECKPOINTS + noise
            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)

            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * 3.14159 * c / CHECKPOINTS
                self.start_alpha = 2 * 3.14159 * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD

            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * 3.14159

            while True:  # Find destination from checkpoints
                failed = True

                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break

                if not failed:
                    break

                alpha -= 2 * 3.14159
                continue

            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            # destination vector projected on rad:
            proj = r1x * dest_dx + r1y * dest_dy
            while beta - alpha > 1.5 * 3.14159:
                beta -= 2 * 3.14159
            while beta - alpha < -1.5 * 3.14159:
                beta += 2 * 3.14159
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[
                i - 1][0] <= self.start_alpha
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" %
                  (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles and obstacles
        last_obstacle = 15  # pour que le début de course se passe sans obstacle
        for i in range(len(track)):

            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i - 1]
            road1_l = (x1 - TRACK_WIDTH * math.cos(beta1),
                       y1 - TRACK_WIDTH * math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH * math.cos(beta1),
                       y1 + TRACK_WIDTH * math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH * math.cos(beta2),
                       y2 - TRACK_WIDTH * math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH * math.cos(beta2),
                       y2 + TRACK_WIDTH * math.sin(beta2))
            vertices = [road1_l, road1_r, road2_r, road2_l]

            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.userData = t
            c = 0.01 * (i % 3)
            t.color = [0, 0.128, 0.624, 0.019]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1),
                        y1 + side * TRACK_WIDTH * math.sin(beta1))
                b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                        y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1))
                b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2),
                        y2 + side * TRACK_WIDTH * math.sin(beta2))
                b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                        y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2))
                self.road_poly.append(([b1_l, b1_r, b2_r,
                                        b2_l], (1, 1, 1) if i % 2 == 0 else
                                       (1, 0, 0)))

            self.random_obs += 1
            self.seed(self.random_obs)
            if (self.np_random.uniform(0, 1) < OBSTACLE_PROB) and (
                    last_obstacle <=
                    0):  # i > 15 pour que la course soit toujours faisable
                last_obstacle = 8

                deriv_left = self.np_random.uniform(TRACK_WIDTH)
                deriv_right = TRACK_WIDTH - deriv_left

                obs1_l = (x1 - (TRACK_WIDTH - deriv_left) * math.cos(beta1),
                          y1 - (TRACK_WIDTH - deriv_left) * math.sin(beta1))
                obs1_r = (x1 + (TRACK_WIDTH - deriv_right) * math.cos(beta1),
                          y1 + (TRACK_WIDTH - deriv_right) * math.sin(beta1))
                obs2_l = (x2 - (TRACK_WIDTH - deriv_left) * math.cos(beta2),
                          y2 - (TRACK_WIDTH - deriv_left) * math.sin(beta2))
                obs2_r = (x2 + (TRACK_WIDTH - deriv_right) * math.cos(beta2),
                          y2 + (TRACK_WIDTH - deriv_right) * math.sin(beta2))

                self.obstacles_positions.append(
                    (obs1_l, obs1_r, obs2_r, obs2_l))
                vertices = [obs1_l, obs1_r, obs2_r, obs2_l]
                obstacle = fixtureDef(shape=polygonShape(vertices=vertices))

                obstacle.userData = obstacle
                obstacle.color = [0.1568, 0.598, 0.2862, 0]

                self.road_poly.append(([obs1_l, obs1_r, obs2_r,
                                        obs2_l], obstacle.color))
            last_obstacle -= 1

        self.track = track
        return True

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []

        while True:
            success = self._create_track()
            if success:
                break
            if self.verbose == 1:
                print("retry to generate track (normal if there are not many"
                      "instances of this message)")
        self.car = Car(self.world,
                       *self.track[0][1:4],
                       sensors_activated=self.sensors_activated)

        return self.step([0, 0.1, 0])[0]

    def step(self, action):

        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])
        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS

        step_reward = 0
        done = False
        INF = 10000

        state = np.full(SENSOR_NB, INF, dtype=float)

        wall = [False] * len(state)
        if action is not None:  # First step without action, called from reset()

            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward

            if self.tile_visited_count == len(self.track):
                done = True
                self.times_succeeded += 1

            x, y = self.car.hull.position

            # Vérification des collisions avec les obstacles:
            for i in range(len(self.obstacles_positions)):
                obs1_l, obs1_r, obs2_r, obs2_l = self.obstacles_positions[i]
                if self.isInsideObstacle((x, y), obs1_l, obs1_r, obs2_l,
                                         obs2_r):
                    done = True

            contact = False
            for w in self.car.wheels:
                tiles = w.contacts
                if (tiles.__len__() > 0):
                    LOCATION = "TILE"
                elif (tiles.__len__() == 0
                      ):  # vraie détection de sortie de route
                    LOCATION = "GRASS"
                    done = True
                    #step_reward -= 400
                    contact = True

            # SENSORS
            for i in range(len(
                    self.car.sensors)):  #check if sensors collide with grass
                tiles = self.car.sensors[i].contacts
                sensor_x = self.car.sensors[i].position.x
                sensor_y = self.car.sensors[i].position.y
                point1 = np.array([sensor_x, sensor_y])
                point2 = np.array([x, y])

                self.car.sensors[i].color = (0, 1, 0)
                if not wall[i % SENSOR_NB]:
                    state[i % SENSOR_NB] = INF

                if (tiles.__len__() == 0):
                    # Sensor de sortie de circuit
                    self.car.sensors[i].color = (0, 0, 0)

                    if not wall[i % SENSOR_NB]:
                        state[i % SENSOR_NB] = np.linalg.norm(point1 - point2)
                        wall[i % SENSOR_NB] = True
                else:

                    # Sensor d'obstacle
                    in_obstacle = False

                    for j in range(len(self.obstacles_positions)):
                        obs1_l, obs1_r, obs2_r, obs2_l = self.obstacles_positions[
                            j]
                        if self.isInsideObstacle((sensor_x, sensor_y), obs1_l,
                                                 obs1_r, obs2_l, obs2_r):
                            in_obstacle = True
                    if in_obstacle:

                        self.car.sensors[i].color = (0, 0, 0)
                        if not wall[i % SENSOR_NB]:
                            state[i % SENSOR_NB] = np.linalg.norm(point1 -
                                                                  point2)
                            wall[i % SENSOR_NB] = True

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))
        return np.append(state, true_speed), step_reward, done

    def isInsideObstacle(self, ref_pos, pos1, pos2, pos3, pos4):
        """
        Vérifie si le point ref_pos se trouve à l'intérieur du quadrilatère composé
        à partir de pos1, pos2, pos3 et pos4
        """
        x, y = ref_pos
        x1, y1 = pos1
        x2, y2 = pos2
        x3, y3 = pos3
        x4, y4 = pos4

        return ((((x2 - x1) * (y - y1)) - ((x - x1) * (y2 - y1)) <= 0)
                and ((((x1 - x3) * (y - y3)) - ((x - x3) * (y1 - y3))) <= 0)
                and ((((x3 - x4) * (y - y4)) - ((x - x4) * (y3 - y4))) <= 0)
                and ((((x4 - x2) * (y - y2)) - ((x - x2) * (y4 - y2))) <= 0))

    def render(self, mode='human'):
        assert mode in ['human', 'state_pixels', 'rgb_array']
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label('0000',
                                                 font_size=36,
                                                 x=20,
                                                 y=WINDOW_H * 2.5 / 40.00,
                                                 anchor_x='left',
                                                 anchor_y='center',
                                                 color=(255, 255, 255, 255))
            self.tile_label = pyglet.text.Label('000',
                                                font_size=36,
                                                x=1,
                                                y=WINDOW_H * 2 / 2.1,
                                                anchor_x='left',
                                                anchor_y='center',
                                                color=(255, 255, 255, 255))
            self.transform = rendering.Transform()

        if "t" not in self.__dict__:
            return  # reset() not called yet

        if ZOOM_FOLLOW:
            # Animate zoom first second:
            zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(
                self.t, 1)
        else:
            zoom = ZOOM
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                            scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                            scroll_y * zoom * math.cos(angle)))
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode != "state_pixels")

        arr = None
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()

        win.clear()
        t = self.transform
        if mode == 'rgb_array':
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == 'state_pixels':
            VP_W = STATE_W
            VP_H = STATE_H
        else:
            pixel_scale = 1
            if hasattr(win.context, '_nscontext'):
                pixel_scale = win.context._nscontext.view().backingScaleFactor(
                )  # pylint: disable=protected-access
            VP_W = int(pixel_scale * WINDOW_W)
            VP_H = int(pixel_scale * WINDOW_H)

        gl.glViewport(0, 0, VP_W, VP_H)
        t.enable()
        self.render_road()
        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()
        self.render_indicators(WINDOW_W, WINDOW_H)

        if mode == 'human':
            win.flip()
            return self.viewer.isopen
        image_data = (pyglet.image.get_buffer_manager().get_color_buffer().
                      get_image_data())
        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep="")
        arr = arr.reshape(VP_H, VP_W, 4)
        arr = arr[::-1, :, 0:3]

        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(0, 0, 0, 0)
        k = PLAYFIELD / 20.0
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                gl.glVertex3f(k * x + k, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + 0, 0)
                gl.glVertex3f(k * x + 0, k * y + k, 0)
                gl.glVertex3f(k * x + k, k * y + k, 0)
        for poly, color in self.road_poly:
            gl.glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)
        gl.glEnd()

    def render_indicators(self, W, H):
        gl.glBegin(gl.GL_QUADS)
        s = W / 40.0
        h = H / 40.0
        gl.glColor4f(0, 0, 0, 1)
        gl.glVertex3f(W, 0, 0)
        gl.glVertex3f(W, 5 * h, 0)
        gl.glVertex3f(0, 5 * h, 0)
        gl.glVertex3f(0, 0, 0)

        def vertical_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h + h * val, 0)
            gl.glVertex3f((place + 1) * s, h, 0)
            gl.glVertex3f((place + 0) * s, h, 0)

        def horiz_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place + 0) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 4 * h, 0)
            gl.glVertex3f((place + val) * s, 2 * h, 0)
            gl.glVertex3f((place + 0) * s, 2 * h, 0)

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
        vertical_ind(7, 0.01 * self.car.wheels[0].omega,
                     (0.0, 0, 1))  # ABS sensors
        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
        gl.glEnd()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()

        global Amount_Left

        self.tile_label.text = "%4i" % Amount_Left
        self.tile_label.draw()

    def addSensorBorder(self, x1, y1, x2, y2):
        """
        Fonction qui ajoute les bords de la route comme segments de droite "Segment2D"
        afin d'avoir des points de repère pour nos sensors.
        """
        pt1 = Point2D(x1, y1, evaluate=False)
        pt2 = Point2D(x2, y2, evaluate=False)

        self.sensorBorder.append(Segment2D(pt1, pt2, evaluate=False))

    def setAngleZero(self):
        self.car.hull.angle = 0
Пример #10
0
class CarRacing(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second' : FPS
    }

    def __init__(self):
        self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]) )  # steer, gas, brake
        self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3))
        self.world = Box2D.b2World((0,0), contactListener=FrictionDetector(self))
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0

    def _destroy(self):
        if not self.road: return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2*math.pi*c/CHECKPOINTS + np.random.uniform(0, 2*math.pi*1/CHECKPOINTS)
            rad = np.random.uniform(TRACK_RAD/3, TRACK_RAD)
            if c==0:
                alpha = 0
                rad = 1.5*TRACK_RAD
            if c==CHECKPOINTS-1:
                alpha = 2*math.pi*c/CHECKPOINTS
                self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS
                rad = 1.5*TRACK_RAD
            checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) )

        #print "\n".join(str(h) for h in checkpoints)
        #self.road_poly = [ (    # uncomment this to see checkpoints
        #    [ (tx,ty) for a,tx,ty in checkpoints ],
        #    (0.7,0.7,0.9) ) ]
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5*TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while 1:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2*math.pi
            while True: # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0: break
                if not failed: break
                alpha -= 2*math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x*dest_dx + r1y*dest_dy  # destination vector projected on rad
            while beta - alpha >  1.5*math.pi: beta -= 2*math.pi
            while beta - alpha < -1.5*math.pi: beta += 2*math.pi
            prev_beta = beta
            proj *= SCALE
            if proj >  0.3: beta -= min(TRACK_TURN_RATE, abs(0.001*proj))
            if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001*proj))
            x += p1x*TRACK_DETAIL_STEP
            y += p1y*TRACK_DETAIL_STEP
            track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) )
            if laps > 4: break
            no_freeze -= 1
            if no_freeze==0: break
        #print "\n".join([str(t) for t in enumerate(track)])

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i==0: return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha
            if pass_through_start and i2==-1:
                i2 = i
            elif pass_through_start and i1==-1:
                i1 = i
                break
        print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1))
        assert i1!=-1
        assert i2!=-1

        track = track[i1:i2-1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square( first_perp_x*(track[0][2] - track[-1][2]) ) +
            np.square( first_perp_y*(track[0][3] - track[-1][3]) ))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False]*len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i-neg-0][1]
                beta2 = track[i-neg-1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i-neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i-1]
            road1_l = (x1 - TRACK_WIDTH*math.cos(beta1), y1 - TRACK_WIDTH*math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH*math.cos(beta1), y1 + TRACK_WIDTH*math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH*math.cos(beta2), y2 - TRACK_WIDTH*math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH*math.cos(beta2), y2 + TRACK_WIDTH*math.sin(beta2))
            t = self.world.CreateStaticBody( fixtures = fixtureDef(
                shape=polygonShape(vertices=[road1_l, road1_r, road2_r, road2_l])
                ))
            t.userData = t
            c = 0.01*(i%3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(( [road1_l, road1_r, road2_r, road2_l], t.color ))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side* TRACK_WIDTH        *math.cos(beta1), y1 + side* TRACK_WIDTH        *math.sin(beta1))
                b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1))
                b2_l = (x2 + side* TRACK_WIDTH        *math.cos(beta2), y2 + side* TRACK_WIDTH        *math.sin(beta2))
                b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2))
                self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) ))
        self.track = track
        return True

    def _reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.human_render = False

        while True:
            success = self._create_track()
            if success: break
            print("retry to generate track (normal if there are not many of this messages)")
        self.car = Car(self.world, *self.track[0][1:4])

        return self._step(None)[0]

    def _step(self, action):
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        self.car.step(1.0/FPS)
        self.world.Step(1.0/FPS, 6*30, 2*30)
        self.t += 1.0/FPS

        self.state = self._render("state_pixels")

        step_reward = 0
        done = False
        if action is not None: # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            #self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count==len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100

        return self.state, step_reward, done, {}

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return

        if self.viewer is None:
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label('0000', font_size=36,
                x=20, y=WINDOW_H*2.5/40.00, anchor_x='left', anchor_y='center',
                color=(255,255,255,255))
            self.transform = rendering.Transform()

        if "t" not in self.__dict__: return  # reset() not called yet

        zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1)   # Animate zoom first second
        zoom_state  = ZOOM*SCALE*STATE_W/WINDOW_W
        zoom_video  = ZOOM*SCALE*VIDEO_W/WINDOW_W
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)),
            WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) )
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode!="state_pixels")

        arr = None
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()
        if mode=="rgb_array" or mode=="state_pixels":
            win.clear()
            t = self.transform
            if mode=='rgb_array':
                VP_W = VIDEO_W
                VP_H = VIDEO_H
            else:
                VP_W = STATE_W
                VP_H = STATE_H
            glViewport(0, 0, VP_W, VP_H)
            t.enable()
            self._render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()
            t.disable()
            self._render_indicators(WINDOW_W, WINDOW_H)  # TODO: find why 2x needed, wtf
            image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
            arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
            arr = arr.reshape(VP_H, VP_W, 4)
            arr = arr[::-1, :, 0:3]

        if mode=="rgb_array" and not self.human_render: # agent can call or not call env.render() itself when recording video.
            win.flip()

        if mode=='human':
            self.human_render = True
            win.clear()
            t = self.transform
            glViewport(0, 0, WINDOW_W, WINDOW_H)
            t.enable()
            self._render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()
            t.disable()
            self._render_indicators(WINDOW_W, WINDOW_H)
            win.flip()

        self.viewer.onetime_geoms = []
        return arr

    def _render_road(self):
        glBegin(GL_QUADS)
        glColor4f(0.4, 0.8, 0.4, 1.0)
        glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        glColor4f(0.4, 0.9, 0.4, 1.0)
        k = PLAYFIELD/20.0
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                glVertex3f(k*x + k, k*y + 0, 0)
                glVertex3f(k*x + 0, k*y + 0, 0)
                glVertex3f(k*x + 0, k*y + k, 0)
                glVertex3f(k*x + k, k*y + k, 0)
        for poly, color in self.road_poly:
            glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                glVertex3f(p[0], p[1], 0)
        glEnd()

    def _render_indicators(self, W, H):
        glBegin(GL_QUADS)
        s = W/40.0
        h = H/40.0
        glColor4f(0,0,0,1)
        glVertex3f(W, 0, 0)
        glVertex3f(W, 5*h, 0)
        glVertex3f(0, 5*h, 0)
        glVertex3f(0, 0, 0)
        def vertical_ind(place, val, color):
            glColor4f(color[0], color[1], color[2], 1)
            glVertex3f((place+0)*s, h + h*val, 0)
            glVertex3f((place+1)*s, h + h*val, 0)
            glVertex3f((place+1)*s, h, 0)
            glVertex3f((place+0)*s, h, 0)
        def horiz_ind(place, val, color):
            glColor4f(color[0], color[1], color[2], 1)
            glVertex3f((place+0)*s, 4*h , 0)
            glVertex3f((place+val)*s, 4*h, 0)
            glVertex3f((place+val)*s, 2*h, 0)
            glVertex3f((place+0)*s, 2*h, 0)
        true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(5, 0.02*true_speed, (1,1,1))
        vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors
        vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1))
        vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1))
        vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1))
        horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0))
        horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0))
        glEnd()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()
Пример #11
0
def main():
    # load in map
    map_file = "search_planning_algos/maps/map3.npy"
    map = np.load(map_file)
    Y, X = map.shape
    dy, dx = 1.0, 1.0
    miny, minx = 0, 0

    # define car
    wheel_radius = 0  # anything non-zero is an obstacle

    # define search params
    eps = 4
    dist_cost = 1
    time_cost = 1
    roughness_cost = 1
    cost_weights = (dist_cost, time_cost, roughness_cost)

    # define action space
    dt = 0.1
    T = 1.0
    velocities = np.linspace(start=1, stop=2, num=2) / dt
    dv = velocities[1] - velocities[0]
    steer_angles = np.linspace(-math.pi / 32, math.pi / 32, num=5)

    # define heading space
    start, stop, step = 0, 315, 45
    num_thetas = int((stop - start) / step) + 1
    thetas = np.linspace(start=0, stop=315, num=num_thetas)
    thetas = thetas / RAD_TO_DEG  # convert to radians
    dtheta = step / RAD_TO_DEG

    # collective variables for discretizing C-sapce
    dstate = np.array([dx, dy, dtheta, dv, dt])
    min_state = np.array([minx, miny, min(thetas), min(velocities), 0])

    # create planner and graph
    prior_map = np.zeros_like(map)
    graph = Graph(map=prior_map,
                  min_state=min_state,
                  dstate=dstate,
                  thetas=thetas,
                  wheel_radius=wheel_radius,
                  cost_weights=cost_weights)
    car = Car(max_steer=max(steer_angles), max_v=max(velocities))
    planner = LatticeDstarLite(graph=graph,
                               car=car,
                               min_state=min_state,
                               dstate=dstate,
                               velocities=velocities,
                               steer_angles=steer_angles,
                               thetas=thetas,
                               T=T,
                               eps=eps,
                               viz=True)

    # define start and  goal (x,y) need to be made continuous
    # since I selected those points on image map of discrete space
    start = (np.array([60, 40, 0, velocities[0], 0]) *
             np.array([dx, dy, 1, 1, 1]))
    # looks like goal should face up, but theta is chosen
    # in image-frame as is the y-coordinates, so -90 faces
    # upwards on our screen and +90 faces down... it looks
    goal = (np.array([85, 65, -math.pi / 2, velocities[0], 0]) *
            np.array([dx, dy, 1, 1, 1]))

    # run planner
    simulate_plan_execution(start=start,
                            goal=goal,
                            planner=planner,
                            true_map=map)
class CarRacing(gym.Env, EzPickle):
    metadata = {
        "render.modes": ["human", "rgb_array", "state_pixels"],
        "video.frames_per_second": FPS,
    }

    def __init__(self, verbose=1):
        EzPickle.__init__(self)
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        gaussian = np.random.normal(FRICTION, 0.2, 1000)
        self.friction_values = gaussian[(gaussian > 0.1) & (gaussian < 1.0)]
        self.friction_change = random.randrange(5, 15) / 10.0
        #plt.hist(self.friction_values, 10)
        #plt.show()
        self.fd_tile = fixtureDef(shape=polygonShape(
            vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))

        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([+1, +1, +1]),
                                       dtype=np.float32)  # steer, gas, brake

        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.uint8)

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road:
            return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()

    def _create_track(self, noises=[], rads=[]):
        CHECKPOINTS = 12

        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            if len(noises) <= c:
                noise = self.np_random.uniform(0,
                                               2 * math.pi * 1 / CHECKPOINTS)
                noises.append(noise)
            else:
                noise = noises[c]
            alpha = 2 * math.pi * c / CHECKPOINTS + noise
            if len(rads) <= c:
                rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
                rads.append(rad)
            else:
                rad = rads[c]

            if c == 0:
                alpha = 0
                rad = 1.5 * TRACK_RAD
            if c == CHECKPOINTS - 1:
                alpha = 2 * math.pi * c / CHECKPOINTS
                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
                rad = 1.5 * TRACK_RAD

            checkpoints.append(
                (alpha, rad * math.cos(alpha), rad * math.sin(alpha)))
        self.road = []

        # Go from one checkpoint to another to create track
        x, y, beta = 1.5 * TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while True:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2 * math.pi

            while True:  # Find destination from checkpoints
                failed = True

                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i %
                                                             len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0:
                        break

                if not failed:
                    break

                alpha -= 2 * math.pi
                continue

            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            # destination vector projected on rad:
            proj = r1x * dest_dx + r1y * dest_dy
            while beta - alpha > 1.5 * math.pi:
                beta -= 2 * math.pi
            while beta - alpha < -1.5 * math.pi:
                beta += 2 * math.pi
            prev_beta = beta
            proj *= SCALE
            if proj > 0.3:
                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
            if proj < -0.3:
                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
            x += p1x * TRACK_DETAIL_STEP
            y += p1y * TRACK_DETAIL_STEP
            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
            if laps > 4:
                break
            no_freeze -= 1
            if no_freeze == 0:
                break

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i == 0:
                return False  # Failed
            pass_through_start = (track[i][0] > self.start_alpha
                                  and track[i - 1][0] <= self.start_alpha)
            if pass_through_start and i2 == -1:
                i2 = i
            elif pass_through_start and i1 == -1:
                i1 = i
                break
        #if self.verbose == 1:
        #print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1))
        assert i1 != -1
        assert i2 != -1

        track = track[i1:i2 - 1]

        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square(first_perp_x * (track[0][2] - track[-1][2])) +
            np.square(first_perp_y * (track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False

        # Red-white border on hard turns
        border = [False] * len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i - neg - 0][1]
                beta2 = track[i - neg - 1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i - neg] |= border[i]

        # Create tiles
        for i in range(len(track)):
            _, beta1, x1, y1 = track[i]
            _, beta2, x2, y2 = track[i - 1]
            road1_l = (
                x1 - TRACK_WIDTH * math.cos(beta1),
                y1 - TRACK_WIDTH * math.sin(beta1),
            )
            road1_r = (
                x1 + TRACK_WIDTH * math.cos(beta1),
                y1 + TRACK_WIDTH * math.sin(beta1),
            )
            road2_l = (
                x2 - TRACK_WIDTH * math.cos(beta2),
                y2 - TRACK_WIDTH * math.sin(beta2),
            )
            road2_r = (
                x2 + TRACK_WIDTH * math.cos(beta2),
                y2 + TRACK_WIDTH * math.sin(beta2),
            )
            vertices = [road1_l, road1_r, road2_r, road2_l]
            self.fd_tile.shape.vertices = vertices
            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
            t.road_id = i
            t.center_p = (x1, y1)
            t.userData = t
            c = 0.01 * (i % 3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            if i % 10 == 0:
                self.friction_change = random.randrange(5, 15) / 10.0
            t.road_friction = np.random.choice(
                self.friction_values) * self.friction_change
            t.fixtures[0].sensor = True
            self.road_poly.append(([road1_l, road1_r, road2_r,
                                    road2_l], t.color))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (
                    x1 + side * TRACK_WIDTH * math.cos(beta1),
                    y1 + side * TRACK_WIDTH * math.sin(beta1),
                )
                b1_r = (
                    x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
                    y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1),
                )
                b2_l = (
                    x2 + side * TRACK_WIDTH * math.cos(beta2),
                    y2 + side * TRACK_WIDTH * math.sin(beta2),
                )
                b2_r = (
                    x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
                    y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2),
                )
                self.road_poly.append(([b1_l, b1_r, b2_r,
                                        b2_l], (1, 1, 1) if i % 2 == 0 else
                                       (1, 0, 0)))
        self.track = track
        self.track_pack = {'noises': noises, 'rads': rads}
        return True

    def reset(self, track=None):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.track_pack = None
        if track is None:
            while True:
                success = self._create_track([], [])
                if success:
                    break
        else:
            self._create_track(track['noises'], track['rads'])
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]

    def step(self, action):
        if action is not None:
            if self.tile_visited_count <= 4 and action[1] < 0.1:
                action[1] = 0.1
                action[2] = 0
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])
        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS
        self.calcInputs()
        self.state = self.render("state_pixels")
        step_reward = 0
        done = False
        if action is not None:  # First step without action, called from reset()
            self.reward -= TIMEPENALTY
            # We actually don't want to count fuel spent, we want car to be faster.
            # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count == len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100
            elif abs(self.car.offset) > 1:
                done = True
            elif abs(self.car.angle) > 1:
                done = True
            elif speed < MIN_SPEED:
                if self.tile_visited_count > 4:
                    done = True  #too slow

        return self.state, step_reward, done, {}

    def calcInputs(self):
        #calculate stuff
        p2 = self.road[self.car.curren_tile].center_p
        p1 = self.road[self.car.curren_tile - 2].center_p
        self.car.offset = self.calcOffset(p1, p2,
                                          self.car.hull.position) / TRACK_WIDTH
        p3 = self.car.wheels[2].position
        p4 = self.car.wheels[0].position
        self.car.angle = self.calcAngle(p1, p2, p3, p4) / MAX_ANGLE
        p3 = p2
        if (self.car.curren_tile + FUTURE_SIGHT) < len(self.road):
            p4 = self.road[self.car.curren_tile +
                           int(FUTURE_SIGHT / 2)].center_p
            p5 = self.road[self.car.curren_tile +
                           int(FUTURE_SIGHT / 2)].center_p
            p6 = self.road[self.car.curren_tile + FUTURE_SIGHT].center_p
        else:
            p4 = self.road[-1].center_p
            p5 = p3
            p6 = p4
        self.car.curve1 = self.calcAngle(p1, p2, p3, p4) / MAX_ANGLE
        self.car.curve2 = self.calcAngle(p1, p2, p5, p6) / MAX_ANGLE
        self.car.slip_rate = self.calcSlipRate()
        self.car.yaw_velocity = self.car.hull.angularVelocity / 400.0  #tested
        self.car.speed = np.linalg.norm(
            self.car.hull.linearVelocity) / MAX_SPEED

    def calcSlipRate(self):
        f_speed = (self.car.wheels[0].omega + self.car.wheels[1].omega) / 2
        r_speed = (self.car.wheels[2].omega + self.car.wheels[3].omega) / 2
        if r_speed > f_speed:
            traction = f_speed / r_speed
            slip = 1 - traction
            return slip
        else:
            return 0

    def calcOffset(self, p1, p2, p3):  #normalized with tack width
        p1 = np.array(p1)
        p2 = np.array(p2)
        p3 = np.array(p3)
        side = ((p3[0] - p2[0]) * (p2[1] - p1[1]) - (p2[0] - p1[0]) *
                (p3[1] - p2[1]))
        side /= -abs(side)
        offset = np.linalg.norm(np.cross(
            p2 - p1, p1 - p3)) / np.linalg.norm(p2 - p1) * side
        return offset
        #return ()/(TRACK_WIDTH)
    def calcAngle(self, p1, p2, p3, p4):
        v1 = -np.array(p1) + np.array(p2)
        v2 = -np.array(p3) + np.array(p4)
        signed_angle = math.atan2(v1[0] * v2[1] - v1[1] * v2[0],
                                  v1[0] * v2[0] + v1[1] * v2[1])
        return np.rad2deg(signed_angle)

    def debug_line(self, p1, p2, color=(0.0, 255, 0.0)):
        #debug line
        class Particle:
            pass

        p = Particle()

        p.poly = [(p1[0], p1[1]), (p2[0], p2[1])]
        p.color = color
        self.viewer.draw_polyline(p.poly, color=p.color, linewidth=2)

    def render(self, mode="human"):
        assert mode in ["human", "state_pixels", "rgb_array"]
        if self.viewer is None:
            from gym.envs.classic_control import rendering

            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label(
                "0000",
                font_size=36,
                x=20,
                y=WINDOW_H * 2.5 / 40.00,
                anchor_x="left",
                anchor_y="center",
                color=(255, 255, 255, 255),
            )
            self.transform = rendering.Transform()

        if "t" not in self.__dict__:
            return  # reset() not called yet

        # Animate zoom first second:
        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1)
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                            scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                            scroll_y * zoom * math.cos(angle)),
        )
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode != "state_pixels")

        #middle line
        p2 = self.road[self.car.curren_tile].center_p
        p1 = self.road[self.car.curren_tile - 2].center_p
        self.debug_line(p1, p2)
        #curve forecast
        p3 = p2
        if (self.car.curren_tile + FUTURE_SIGHT) < len(self.road):
            p4 = self.road[self.car.curren_tile +
                           int(FUTURE_SIGHT / 2)].center_p
            p5 = self.road[self.car.curren_tile +
                           int(FUTURE_SIGHT / 2)].center_p
            p6 = self.road[self.car.curren_tile + FUTURE_SIGHT].center_p
        else:
            p4 = self.road[-1].center_p
            p5 = p3
            p6 = p4
        self.debug_line(p3, p4, color=(50, 50, 0))
        self.debug_line(p5, p6, color=(0, 100, 50))
        arr = None
        win = self.viewer.window
        win.switch_to()
        win.dispatch_events()

        win.clear()
        t = self.transform
        if mode == "rgb_array":
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == "state_pixels":
            VP_W = STATE_W
            VP_H = STATE_H
        else:
            pixel_scale = 1
            if hasattr(win.context, "_nscontext"):
                pixel_scale = (
                    win.context._nscontext.view().backingScaleFactor())  # pylint: disable=protected-access
            VP_W = int(pixel_scale * WINDOW_W)
            VP_H = int(pixel_scale * WINDOW_H)

        gl.glViewport(0, 0, VP_W, VP_H)
        t.enable()
        self.render_road()
        for geom in self.viewer.onetime_geoms:
            geom.render()
        self.viewer.onetime_geoms = []
        t.disable()
        self.render_indicators(WINDOW_W, WINDOW_H)

        if mode == "human":
            win.flip()
            return self.viewer.isopen

        image_data = (pyglet.image.get_buffer_manager().get_color_buffer().
                      get_image_data())
        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep="")
        arr = arr.reshape(VP_H, VP_W, 4)
        arr = arr[::-1, :, 0:3]

        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        colors = [0.4, 0.8, 0.4, 1.0] * 4
        polygons_ = [
            +PLAYFIELD,
            +PLAYFIELD,
            0,
            +PLAYFIELD,
            -PLAYFIELD,
            0,
            -PLAYFIELD,
            -PLAYFIELD,
            0,
            -PLAYFIELD,
            +PLAYFIELD,
            0,
        ]

        k = PLAYFIELD / 20.0
        colors.extend([0.4, 0.9, 0.4, 1.0] * 4 * 20 * 20)
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                polygons_.extend([
                    k * x + k,
                    k * y + 0,
                    0,
                    k * x + 0,
                    k * y + 0,
                    0,
                    k * x + 0,
                    k * y + k,
                    0,
                    k * x + k,
                    k * y + k,
                    0,
                ])

        for poly, color in self.road_poly:
            colors.extend([color[0], color[1], color[2], 1] * len(poly))
            for p in poly:
                polygons_.extend([p[0], p[1], 0])

        vl = pyglet.graphics.vertex_list(
            len(polygons_) // 3,
            ("v3f", polygons_),
            ("c4f", colors)  # gl.GL_QUADS,
        )
        vl.draw(gl.GL_QUADS)
        vl.delete()

    def render_indicators(self, W, H):
        s = W / 40.0
        h = H / 40.0
        colors = [0, 0, 0, 1] * 4
        polygons = [W, 0, 0, W, 5 * h, 0, 0, 5 * h, 0, 0, 0, 0]

        def vertical_ind(place, val, color):
            colors.extend([color[0], color[1], color[2], 1] * 4)
            polygons.extend([
                place * s,
                h + h * val,
                0,
                (place + 1) * s,
                h + h * val,
                0,
                (place + 1) * s,
                h,
                0,
                (place + 0) * s,
                h,
                0,
            ])

        def horiz_ind(place, val, color):
            colors.extend([color[0], color[1], color[2], 1] * 4)
            polygons.extend([
                (place + 0) * s,
                4 * h,
                0,
                (place + val) * s,
                4 * h,
                0,
                (place + val) * s,
                2 * h,
                0,
                (place + 0) * s,
                2 * h,
                0,
            ])

        true_speed = np.sqrt(
            np.square(self.car.hull.linearVelocity[0]) +
            np.square(self.car.hull.linearVelocity[1]))

        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
        vertical_ind(7, 0.01 * self.car.wheels[0].omega,
                     (0.0, 0, 1))  # ABS sensors
        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
        vl = pyglet.graphics.vertex_list(
            len(polygons) // 3,
            ("v3f", polygons),
            ("c4f", colors)  # gl.GL_QUADS,
        )
        vl.draw(gl.GL_QUADS)
        vl.delete()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()

    def create_tournament(self, networks, grp_size=5):
        groups = []
        group = {'players': []}
        for i in range(len(networks)):
            group['players'].append(deepcopy(networks[i]))
            if len(group['players']) == grp_size:
                self.track_pack = None
                self.reset()
                group['track'] = self.track_pack
                print(self.track_pack['noises'][10])
                group['best_player'] = None
                groups.append(group)
                group = {'players': []}

        return groups