def reset(self): # Destroy all objects self._destroy() # TODO: decide on the reward self.reward = 0.0 self.prev_reward = 0.0 self.tot_reward = [0.0 for _ in range(NUM_VEHICLES)] # Transition for zoom self.t = make_n_times(NUM_VEHICLES) # Rendering values self.road_poly = [] self.human_render = False while True: success = self._create_track() if success: break print("retry to generate track (normal if there are not many of this messages)") # randomly init each car if not self.init_pos: rect_poly_indices = [i for i in range(len(self.directions)) if self.directions[i] in "lrtb"] random_choices = np.random.choice(rect_poly_indices, NUM_VEHICLES, replace=False) for car_idx, rid in enumerate(random_choices): rect_poly = np.array(self.road_poly[rid][0]) direction = self.directions[rid] x = np.mean(rect_poly[:, 0]) y = np.mean(rect_poly[:, 1]) if direction == "r": angle = -90 elif direction == "t": angle = 0 elif direction == "l": angle = 90 else: angle = 180 self.cars[car_idx] = Car(self.world, angle*math.pi/180.0, x, y) else: for car_idx, init_p in enumerate(self.init_pos): i, j, angle = init_p i -= 0.5 j += 0.5 x, y = i*EDGE_WIDTH, j*EDGE_WIDTH x += self.off_params[0] y += self.off_params[1] self.cars[car_idx] = Car(self.world, angle*math.pi/180.0, x, y) # return states after init return self.step([None for i in range(NUM_VEHICLES)])[0]
def reset(self): self._destroy() self.reward = 0.0 self.t = 0.0 self.road_poly = [] self.state = State() while True: success = self._create_track() if success: break print( "retry to generate track (normal if there are not many of this messages)" ) self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0]
def _reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.human_render = False while True: success = self._create_track() if success: break print("retry to generate track (normal if there are not many of this messages)") self.car = Car(self.world, *self.track[0][1:4]) return self._step(None)[0]
def init_planner(prior_map): # load in vehicle params with open(CAR_PARAMS_FILE, 'r') as f: car_params = yaml.load(f) # define search params eps = 2.0 dist_cost = 1 time_cost = 1 roughness_cost = 1 cost_weights = (dist_cost, time_cost, roughness_cost) # Define action space dt = 0.1 T = 1.0 # velocities # TODO: Let handle negative velocities, but enforce basic dynamic windowing max_speed = car_params["max_speed"] num_speed = 3 velocities = np.linspace(start=0, stop=max_speed, num=num_speed) / dt dv = velocities[1] - velocities[0] # steer angles max_abs_steer = car_params["max_steer"] num_steer = 3 steer_angles = np.linspace(-max_abs_steer, max_abs_steer, num=num_steer) # create simple car model for planning mid_to_wheel_length = car_params["car_length"] / 2.0 car = Car(L=mid_to_wheel_length, max_v=car_params["max_speed"], max_steer=car_params["max_steer"], wheel_radius=car_params["wheel_radius"]) # define heading space start, stop, step = 0, 315, 45 num_thetas = int((stop - start) / step) + 1 thetas = np.linspace(start=0, stop=315, num=num_thetas) thetas = thetas / RAD_TO_DEG # convert to radians dtheta = step / RAD_TO_DEG # collective variables for discretizing C-sapce dy, dx = 1.0, 1.0 miny, minx = 0, 0 dstate = np.array([dx, dy, dtheta, dv, dt]) min_state = np.array([minx, miny, min(thetas), min(velocities), 0]) # get planners planner = create_planner(cost_weights=cost_weights, thetas=thetas, steer_angles=steer_angles, velocities=velocities, car=car, min_state=min_state, dstate=dstate, prior_map=prior_map, eps=eps, T=T) # store planner in file to be used for planning with open(PLANNER_FILE, "w") as f: pickle.dump(planner, f)
def reset(self, track=None): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.track_pack = None if track is None: while True: success = self._create_track([], []) if success: break else: self._create_track(track['noises'], track['rads']) self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0]
def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] while True: success = self._create_track() if success: break if self.verbose == 1: print("retry to generate track (normal if there are not many" "instances of this message)") self.car = Car(self.world, *self.track[0][1:4], sensors_activated=self.sensors_activated) return self.step([0, 0.1, 0])[0]
class CarRacing(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array', 'state_pixels'], 'video.frames_per_second' : FPS } def __init__(self): self._seed() self.world = Box2D.b2World((0,0), contactListener=FrictionDetector(self)) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1])) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3)) def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS) rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD) if c==0: alpha = 0 rad = 1.5*TRACK_RAD if c==CHECKPOINTS-1: alpha = 2*math.pi*c/CHECKPOINTS self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS rad = 1.5*TRACK_RAD checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) ) #print "\n".join(str(h) for h in checkpoints) #self.road_poly = [ ( # uncomment this to see checkpoints # [ (tx,ty) for a,tx,ty in checkpoints ], # (0.7,0.7,0.9) ) ] self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5*TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while 1: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2*math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2*math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y proj = r1x*dest_dx + r1y*dest_dy # destination vector projected on rad while beta - alpha > 1.5*math.pi: beta -= 2*math.pi while beta - alpha < -1.5*math.pi: beta += 2*math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001*proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001*proj)) x += p1x*TRACK_DETAIL_STEP y += p1y*TRACK_DETAIL_STEP track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) ) if laps > 4: break no_freeze -= 1 if no_freeze==0: break #print "\n".join([str(t) for t in enumerate(track)]) # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i==0: return False # Failed pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha if pass_through_start and i2==-1: i2 = i elif pass_through_start and i1==-1: i1 = i break print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1)) assert i1!=-1 assert i2!=-1 track = track[i1:i2-1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square( first_perp_x*(track[0][2] - track[-1][2]) ) + np.square( first_perp_y*(track[0][3] - track[-1][3]) )) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False]*len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i-neg-0][1] beta2 = track[i-neg-1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i-neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i-1] road1_l = (x1 - TRACK_WIDTH*math.cos(beta1), y1 - TRACK_WIDTH*math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH*math.cos(beta1), y1 + TRACK_WIDTH*math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH*math.cos(beta2), y2 - TRACK_WIDTH*math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH*math.cos(beta2), y2 + TRACK_WIDTH*math.sin(beta2)) t = self.world.CreateStaticBody( fixtures = fixtureDef( shape=polygonShape(vertices=[road1_l, road1_r, road2_r, road2_l]) )) t.userData = t c = 0.01*(i%3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(( [road1_l, road1_r, road2_r, road2_l], t.color )) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side* TRACK_WIDTH *math.cos(beta1), y1 + side* TRACK_WIDTH *math.sin(beta1)) b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1)) b2_l = (x2 + side* TRACK_WIDTH *math.cos(beta2), y2 + side* TRACK_WIDTH *math.sin(beta2)) b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2)) self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) )) self.track = track return True def _reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.human_render = False while True: success = self._create_track() if success: break print("retry to generate track (normal if there are not many of this messages)") self.car = Car(self.world, *self.track[0][1:4]) return self._step(None)[0] def _step(self, action): if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0/FPS) self.world.Step(1.0/FPS, 6*30, 2*30) self.t += 1.0/FPS self.state = self._render("state_pixels") step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. #self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count==len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 return self.state, step_reward, done, {} def _render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None return if self.viewer is None: self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label('0000', font_size=36, x=20, y=WINDOW_H*2.5/40.00, anchor_x='left', anchor_y='center', color=(255,255,255,255)) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1) # Animate zoom first second zoom_state = ZOOM*SCALE*STATE_W/WINDOW_W zoom_video = ZOOM*SCALE*VIDEO_W/WINDOW_W scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)), WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) ) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode!="state_pixels") arr = None win = self.viewer.window if mode != 'state_pixels': win.switch_to() win.dispatch_events() if mode=="rgb_array" or mode=="state_pixels": win.clear() t = self.transform if mode=='rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H else: VP_W = STATE_W VP_H = STATE_H glViewport(0, 0, VP_W, VP_H) t.enable() self._render_road() for geom in self.viewer.onetime_geoms: geom.render() t.disable() self._render_indicators(WINDOW_W, WINDOW_H) # TODO: find why 2x needed, wtf image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data() arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] if mode=="rgb_array" and not self.human_render: # agent can call or not call env.render() itself when recording video. win.flip() if mode=='human': self.human_render = True win.clear() t = self.transform glViewport(0, 0, WINDOW_W, WINDOW_H) t.enable() self._render_road() for geom in self.viewer.onetime_geoms: geom.render() t.disable() self._render_indicators(WINDOW_W, WINDOW_H) win.flip() self.viewer.onetime_geoms = [] return arr def _render_road(self): glBegin(GL_QUADS) glColor4f(0.4, 0.8, 0.4, 1.0) glVertex3f(-PLAYFIELD, +PLAYFIELD, 0) glVertex3f(+PLAYFIELD, +PLAYFIELD, 0) glVertex3f(+PLAYFIELD, -PLAYFIELD, 0) glVertex3f(-PLAYFIELD, -PLAYFIELD, 0) glColor4f(0.4, 0.9, 0.4, 1.0) k = PLAYFIELD/20.0 for x in range(-20, 20, 2): for y in range(-20, 20, 2): glVertex3f(k*x + k, k*y + 0, 0) glVertex3f(k*x + 0, k*y + 0, 0) glVertex3f(k*x + 0, k*y + k, 0) glVertex3f(k*x + k, k*y + k, 0) for poly, color in self.road_poly: glColor4f(color[0], color[1], color[2], 1) for p in poly: glVertex3f(p[0], p[1], 0) glEnd() def _render_indicators(self, W, H): glBegin(GL_QUADS) s = W/40.0 h = H/40.0 glColor4f(0,0,0,1) glVertex3f(W, 0, 0) glVertex3f(W, 5*h, 0) glVertex3f(0, 5*h, 0) glVertex3f(0, 0, 0) def vertical_ind(place, val, color): glColor4f(color[0], color[1], color[2], 1) glVertex3f((place+0)*s, h + h*val, 0) glVertex3f((place+1)*s, h + h*val, 0) glVertex3f((place+1)*s, h, 0) glVertex3f((place+0)*s, h, 0) def horiz_ind(place, val, color): glColor4f(color[0], color[1], color[2], 1) glVertex3f((place+0)*s, 4*h , 0) glVertex3f((place+val)*s, 4*h, 0) glVertex3f((place+val)*s, 2*h, 0) glVertex3f((place+0)*s, 2*h, 0) true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(5, 0.02*true_speed, (1,1,1)) vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1)) vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1)) vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1)) horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0)) horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0)) glEnd() self.score_label.text = "%04i" % self.reward self.score_label.draw()
class CarRacing(gym.Env, EzPickle): metadata = { 'render.modes': ['human', 'rgb_array', 'state_pixels'], 'video.frames_per_second': FPS } def __init__(self, obstacle_prob, verbose=1, sensors_activated=True): EzPickle.__init__(self) self.obstacles_positions = [ ] # sera rempli de 4-tuples contenant la position de chaque mur self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.times_succeeded = 0 self.verbose = verbose self.sensors_activated = sensors_activated self.fd_tile = fixtureDef(shape=polygonShape( vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])) self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([+1, +1, +1]), dtype=np.float32) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) self.random_obs = 0 global OBSTACLE_PROB OBSTACLE_PROB = obstacle_prob def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # 12 = nombre de virages (11) + le départ (1) # Create checkpoints checkpoints = [] self.obstacles_positions = [] for c in range(CHECKPOINTS): noise = self.np_random.uniform(0, 2 * 3.14159 * 1 / CHECKPOINTS) alpha = 2 * 3.14159 * c / CHECKPOINTS + noise rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * 3.14159 * c / CHECKPOINTS self.start_alpha = 2 * 3.14159 * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append( (alpha, rad * math.cos(alpha), rad * math.sin(alpha))) self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * 3.14159 while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * 3.14159 continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y # destination vector projected on rad: proj = r1x * dest_dx + r1y * dest_dy while beta - alpha > 1.5 * 3.14159: beta -= 2 * 3.14159 while beta - alpha < -1.5 * 3.14159: beta += 2 * 3.14159 prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = track[i][0] > self.start_alpha and track[ i - 1][0] <= self.start_alpha if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose == 1: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles and obstacles last_obstacle = 15 # pour que le début de course se passe sans obstacle for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = (x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2)) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t c = 0.01 * (i % 3) t.color = [0, 0.128, 0.624, 0.019] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1)) b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1)) b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2)) b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2)) self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))) self.random_obs += 1 self.seed(self.random_obs) if (self.np_random.uniform(0, 1) < OBSTACLE_PROB) and ( last_obstacle <= 0): # i > 15 pour que la course soit toujours faisable last_obstacle = 8 deriv_left = self.np_random.uniform(TRACK_WIDTH) deriv_right = TRACK_WIDTH - deriv_left obs1_l = (x1 - (TRACK_WIDTH - deriv_left) * math.cos(beta1), y1 - (TRACK_WIDTH - deriv_left) * math.sin(beta1)) obs1_r = (x1 + (TRACK_WIDTH - deriv_right) * math.cos(beta1), y1 + (TRACK_WIDTH - deriv_right) * math.sin(beta1)) obs2_l = (x2 - (TRACK_WIDTH - deriv_left) * math.cos(beta2), y2 - (TRACK_WIDTH - deriv_left) * math.sin(beta2)) obs2_r = (x2 + (TRACK_WIDTH - deriv_right) * math.cos(beta2), y2 + (TRACK_WIDTH - deriv_right) * math.sin(beta2)) self.obstacles_positions.append( (obs1_l, obs1_r, obs2_r, obs2_l)) vertices = [obs1_l, obs1_r, obs2_r, obs2_l] obstacle = fixtureDef(shape=polygonShape(vertices=vertices)) obstacle.userData = obstacle obstacle.color = [0.1568, 0.598, 0.2862, 0] self.road_poly.append(([obs1_l, obs1_r, obs2_r, obs2_l], obstacle.color)) last_obstacle -= 1 self.track = track return True def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] while True: success = self._create_track() if success: break if self.verbose == 1: print("retry to generate track (normal if there are not many" "instances of this message)") self.car = Car(self.world, *self.track[0][1:4], sensors_activated=self.sensors_activated) return self.step([0, 0.1, 0])[0] def step(self, action): if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS step_reward = 0 done = False INF = 10000 state = np.full(SENSOR_NB, INF, dtype=float) wall = [False] * len(state) if action is not None: # First step without action, called from reset() self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track): done = True self.times_succeeded += 1 x, y = self.car.hull.position # Vérification des collisions avec les obstacles: for i in range(len(self.obstacles_positions)): obs1_l, obs1_r, obs2_r, obs2_l = self.obstacles_positions[i] if self.isInsideObstacle((x, y), obs1_l, obs1_r, obs2_l, obs2_r): done = True contact = False for w in self.car.wheels: tiles = w.contacts if (tiles.__len__() > 0): LOCATION = "TILE" elif (tiles.__len__() == 0 ): # vraie détection de sortie de route LOCATION = "GRASS" done = True #step_reward -= 400 contact = True # SENSORS for i in range(len( self.car.sensors)): #check if sensors collide with grass tiles = self.car.sensors[i].contacts sensor_x = self.car.sensors[i].position.x sensor_y = self.car.sensors[i].position.y point1 = np.array([sensor_x, sensor_y]) point2 = np.array([x, y]) self.car.sensors[i].color = (0, 1, 0) if not wall[i % SENSOR_NB]: state[i % SENSOR_NB] = INF if (tiles.__len__() == 0): # Sensor de sortie de circuit self.car.sensors[i].color = (0, 0, 0) if not wall[i % SENSOR_NB]: state[i % SENSOR_NB] = np.linalg.norm(point1 - point2) wall[i % SENSOR_NB] = True else: # Sensor d'obstacle in_obstacle = False for j in range(len(self.obstacles_positions)): obs1_l, obs1_r, obs2_r, obs2_l = self.obstacles_positions[ j] if self.isInsideObstacle((sensor_x, sensor_y), obs1_l, obs1_r, obs2_l, obs2_r): in_obstacle = True if in_obstacle: self.car.sensors[i].color = (0, 0, 0) if not wall[i % SENSOR_NB]: state[i % SENSOR_NB] = np.linalg.norm(point1 - point2) wall[i % SENSOR_NB] = True true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) return np.append(state, true_speed), step_reward, done def isInsideObstacle(self, ref_pos, pos1, pos2, pos3, pos4): """ Vérifie si le point ref_pos se trouve à l'intérieur du quadrilatère composé à partir de pos1, pos2, pos3 et pos4 """ x, y = ref_pos x1, y1 = pos1 x2, y2 = pos2 x3, y3 = pos3 x4, y4 = pos4 return ((((x2 - x1) * (y - y1)) - ((x - x1) * (y2 - y1)) <= 0) and ((((x1 - x3) * (y - y3)) - ((x - x3) * (y1 - y3))) <= 0) and ((((x3 - x4) * (y - y4)) - ((x - x4) * (y3 - y4))) <= 0) and ((((x4 - x2) * (y - y2)) - ((x - x2) * (y4 - y2))) <= 0)) def render(self, mode='human'): assert mode in ['human', 'state_pixels', 'rgb_array'] if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label('0000', font_size=36, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x='left', anchor_y='center', color=(255, 255, 255, 255)) self.tile_label = pyglet.text.Label('000', font_size=36, x=1, y=WINDOW_H * 2 / 2.1, anchor_x='left', anchor_y='center', color=(255, 255, 255, 255)) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet if ZOOM_FOLLOW: # Animate zoom first second: zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min( self.t, 1) else: zoom = ZOOM scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle))) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") arr = None win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() t = self.transform if mode == 'rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H elif mode == 'state_pixels': VP_W = STATE_W VP_H = STATE_H else: pixel_scale = 1 if hasattr(win.context, '_nscontext'): pixel_scale = win.context._nscontext.view().backingScaleFactor( ) # pylint: disable=protected-access VP_W = int(pixel_scale * WINDOW_W) VP_H = int(pixel_scale * WINDOW_H) gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() self.viewer.onetime_geoms = [] t.disable() self.render_indicators(WINDOW_W, WINDOW_H) if mode == 'human': win.flip() return self.viewer.isopen image_data = (pyglet.image.get_buffer_manager().get_color_buffer(). get_image_data()) arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep="") arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): gl.glBegin(gl.GL_QUADS) gl.glColor4f(0, 0, 0, 0) k = PLAYFIELD / 20.0 for x in range(-20, 20, 2): for y in range(-20, 20, 2): gl.glVertex3f(k * x + k, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + k, 0) gl.glVertex3f(k * x + k, k * y + k, 0) for poly, color in self.road_poly: gl.glColor4f(color[0], color[1], color[2], 1) for p in poly: gl.glVertex3f(p[0], p[1], 0) gl.glEnd() def render_indicators(self, W, H): gl.glBegin(gl.GL_QUADS) s = W / 40.0 h = H / 40.0 gl.glColor4f(0, 0, 0, 1) gl.glVertex3f(W, 0, 0) gl.glVertex3f(W, 5 * h, 0) gl.glVertex3f(0, 5 * h, 0) gl.glVertex3f(0, 0, 0) def vertical_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h, 0) gl.glVertex3f((place + 0) * s, h, 0) def horiz_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 2 * h, 0) gl.glVertex3f((place + 0) * s, 2 * h, 0) true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(5, 0.02 * true_speed, (1, 1, 1)) vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1)) vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1)) vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1)) horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0)) horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0)) gl.glEnd() self.score_label.text = "%04i" % self.reward self.score_label.draw() global Amount_Left self.tile_label.text = "%4i" % Amount_Left self.tile_label.draw() def addSensorBorder(self, x1, y1, x2, y2): """ Fonction qui ajoute les bords de la route comme segments de droite "Segment2D" afin d'avoir des points de repère pour nos sensors. """ pt1 = Point2D(x1, y1, evaluate=False) pt2 = Point2D(x2, y2, evaluate=False) self.sensorBorder.append(Segment2D(pt1, pt2, evaluate=False)) def setAngleZero(self): self.car.hull.angle = 0
class CarRacing(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array', 'state_pixels'], 'video.frames_per_second' : FPS } def __init__(self): self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]) ) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3)) self.world = Box2D.b2World((0,0), contactListener=FrictionDetector(self)) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): alpha = 2*math.pi*c/CHECKPOINTS + np.random.uniform(0, 2*math.pi*1/CHECKPOINTS) rad = np.random.uniform(TRACK_RAD/3, TRACK_RAD) if c==0: alpha = 0 rad = 1.5*TRACK_RAD if c==CHECKPOINTS-1: alpha = 2*math.pi*c/CHECKPOINTS self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS rad = 1.5*TRACK_RAD checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) ) #print "\n".join(str(h) for h in checkpoints) #self.road_poly = [ ( # uncomment this to see checkpoints # [ (tx,ty) for a,tx,ty in checkpoints ], # (0.7,0.7,0.9) ) ] self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5*TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while 1: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2*math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2*math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y proj = r1x*dest_dx + r1y*dest_dy # destination vector projected on rad while beta - alpha > 1.5*math.pi: beta -= 2*math.pi while beta - alpha < -1.5*math.pi: beta += 2*math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001*proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001*proj)) x += p1x*TRACK_DETAIL_STEP y += p1y*TRACK_DETAIL_STEP track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) ) if laps > 4: break no_freeze -= 1 if no_freeze==0: break #print "\n".join([str(t) for t in enumerate(track)]) # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i==0: return False # Failed pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha if pass_through_start and i2==-1: i2 = i elif pass_through_start and i1==-1: i1 = i break print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1)) assert i1!=-1 assert i2!=-1 track = track[i1:i2-1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square( first_perp_x*(track[0][2] - track[-1][2]) ) + np.square( first_perp_y*(track[0][3] - track[-1][3]) )) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False]*len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i-neg-0][1] beta2 = track[i-neg-1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i-neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i-1] road1_l = (x1 - TRACK_WIDTH*math.cos(beta1), y1 - TRACK_WIDTH*math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH*math.cos(beta1), y1 + TRACK_WIDTH*math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH*math.cos(beta2), y2 - TRACK_WIDTH*math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH*math.cos(beta2), y2 + TRACK_WIDTH*math.sin(beta2)) t = self.world.CreateStaticBody( fixtures = fixtureDef( shape=polygonShape(vertices=[road1_l, road1_r, road2_r, road2_l]) )) t.userData = t c = 0.01*(i%3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(( [road1_l, road1_r, road2_r, road2_l], t.color )) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side* TRACK_WIDTH *math.cos(beta1), y1 + side* TRACK_WIDTH *math.sin(beta1)) b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1)) b2_l = (x2 + side* TRACK_WIDTH *math.cos(beta2), y2 + side* TRACK_WIDTH *math.sin(beta2)) b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2)) self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) )) self.track = track return True def _reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.human_render = False while True: success = self._create_track() if success: break print("retry to generate track (normal if there are not many of this messages)") self.car = Car(self.world, *self.track[0][1:4]) return self._step(None)[0] def _step(self, action): if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0/FPS) self.world.Step(1.0/FPS, 6*30, 2*30) self.t += 1.0/FPS self.state = self._render("state_pixels") step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. #self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count==len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 return self.state, step_reward, done, {} def _render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None return if self.viewer is None: self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label('0000', font_size=36, x=20, y=WINDOW_H*2.5/40.00, anchor_x='left', anchor_y='center', color=(255,255,255,255)) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1) # Animate zoom first second zoom_state = ZOOM*SCALE*STATE_W/WINDOW_W zoom_video = ZOOM*SCALE*VIDEO_W/WINDOW_W scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)), WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) ) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode!="state_pixels") arr = None win = self.viewer.window win.switch_to() win.dispatch_events() if mode=="rgb_array" or mode=="state_pixels": win.clear() t = self.transform if mode=='rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H else: VP_W = STATE_W VP_H = STATE_H glViewport(0, 0, VP_W, VP_H) t.enable() self._render_road() for geom in self.viewer.onetime_geoms: geom.render() t.disable() self._render_indicators(WINDOW_W, WINDOW_H) # TODO: find why 2x needed, wtf image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data() arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] if mode=="rgb_array" and not self.human_render: # agent can call or not call env.render() itself when recording video. win.flip() if mode=='human': self.human_render = True win.clear() t = self.transform glViewport(0, 0, WINDOW_W, WINDOW_H) t.enable() self._render_road() for geom in self.viewer.onetime_geoms: geom.render() t.disable() self._render_indicators(WINDOW_W, WINDOW_H) win.flip() self.viewer.onetime_geoms = [] return arr def _render_road(self): glBegin(GL_QUADS) glColor4f(0.4, 0.8, 0.4, 1.0) glVertex3f(-PLAYFIELD, +PLAYFIELD, 0) glVertex3f(+PLAYFIELD, +PLAYFIELD, 0) glVertex3f(+PLAYFIELD, -PLAYFIELD, 0) glVertex3f(-PLAYFIELD, -PLAYFIELD, 0) glColor4f(0.4, 0.9, 0.4, 1.0) k = PLAYFIELD/20.0 for x in range(-20, 20, 2): for y in range(-20, 20, 2): glVertex3f(k*x + k, k*y + 0, 0) glVertex3f(k*x + 0, k*y + 0, 0) glVertex3f(k*x + 0, k*y + k, 0) glVertex3f(k*x + k, k*y + k, 0) for poly, color in self.road_poly: glColor4f(color[0], color[1], color[2], 1) for p in poly: glVertex3f(p[0], p[1], 0) glEnd() def _render_indicators(self, W, H): glBegin(GL_QUADS) s = W/40.0 h = H/40.0 glColor4f(0,0,0,1) glVertex3f(W, 0, 0) glVertex3f(W, 5*h, 0) glVertex3f(0, 5*h, 0) glVertex3f(0, 0, 0) def vertical_ind(place, val, color): glColor4f(color[0], color[1], color[2], 1) glVertex3f((place+0)*s, h + h*val, 0) glVertex3f((place+1)*s, h + h*val, 0) glVertex3f((place+1)*s, h, 0) glVertex3f((place+0)*s, h, 0) def horiz_ind(place, val, color): glColor4f(color[0], color[1], color[2], 1) glVertex3f((place+0)*s, 4*h , 0) glVertex3f((place+val)*s, 4*h, 0) glVertex3f((place+val)*s, 2*h, 0) glVertex3f((place+0)*s, 2*h, 0) true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(5, 0.02*true_speed, (1,1,1)) vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1)) vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1)) vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1)) horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0)) horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0)) glEnd() self.score_label.text = "%04i" % self.reward self.score_label.draw()
def main(): # load in map map_file = "search_planning_algos/maps/map3.npy" map = np.load(map_file) Y, X = map.shape dy, dx = 1.0, 1.0 miny, minx = 0, 0 # define car wheel_radius = 0 # anything non-zero is an obstacle # define search params eps = 4 dist_cost = 1 time_cost = 1 roughness_cost = 1 cost_weights = (dist_cost, time_cost, roughness_cost) # define action space dt = 0.1 T = 1.0 velocities = np.linspace(start=1, stop=2, num=2) / dt dv = velocities[1] - velocities[0] steer_angles = np.linspace(-math.pi / 32, math.pi / 32, num=5) # define heading space start, stop, step = 0, 315, 45 num_thetas = int((stop - start) / step) + 1 thetas = np.linspace(start=0, stop=315, num=num_thetas) thetas = thetas / RAD_TO_DEG # convert to radians dtheta = step / RAD_TO_DEG # collective variables for discretizing C-sapce dstate = np.array([dx, dy, dtheta, dv, dt]) min_state = np.array([minx, miny, min(thetas), min(velocities), 0]) # create planner and graph prior_map = np.zeros_like(map) graph = Graph(map=prior_map, min_state=min_state, dstate=dstate, thetas=thetas, wheel_radius=wheel_radius, cost_weights=cost_weights) car = Car(max_steer=max(steer_angles), max_v=max(velocities)) planner = LatticeDstarLite(graph=graph, car=car, min_state=min_state, dstate=dstate, velocities=velocities, steer_angles=steer_angles, thetas=thetas, T=T, eps=eps, viz=True) # define start and goal (x,y) need to be made continuous # since I selected those points on image map of discrete space start = (np.array([60, 40, 0, velocities[0], 0]) * np.array([dx, dy, 1, 1, 1])) # looks like goal should face up, but theta is chosen # in image-frame as is the y-coordinates, so -90 faces # upwards on our screen and +90 faces down... it looks goal = (np.array([85, 65, -math.pi / 2, velocities[0], 0]) * np.array([dx, dy, 1, 1, 1])) # run planner simulate_plan_execution(start=start, goal=goal, planner=planner, true_map=map)
class CarRacing(gym.Env, EzPickle): metadata = { "render.modes": ["human", "rgb_array", "state_pixels"], "video.frames_per_second": FPS, } def __init__(self, verbose=1): EzPickle.__init__(self) self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose gaussian = np.random.normal(FRICTION, 0.2, 1000) self.friction_values = gaussian[(gaussian > 0.1) & (gaussian < 1.0)] self.friction_change = random.randrange(5, 15) / 10.0 #plt.hist(self.friction_values, 10) #plt.show() self.fd_tile = fixtureDef(shape=polygonShape( vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])) self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([+1, +1, +1]), dtype=np.float32) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self, noises=[], rads=[]): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): if len(noises) <= c: noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS) noises.append(noise) else: noise = noises[c] alpha = 2 * math.pi * c / CHECKPOINTS + noise if len(rads) <= c: rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) rads.append(rad) else: rad = rads[c] if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append( (alpha, rad * math.cos(alpha), rad * math.sin(alpha))) self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y # destination vector projected on rad: proj = r1x * dest_dx + r1y * dest_dy while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = (track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha) if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break #if self.verbose == 1: #print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): _, beta1, x1, y1 = track[i] _, beta2, x2, y2 = track[i - 1] road1_l = ( x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1), ) road1_r = ( x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1), ) road2_l = ( x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2), ) road2_r = ( x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2), ) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.road_id = i t.center_p = (x1, y1) t.userData = t c = 0.01 * (i % 3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False if i % 10 == 0: self.friction_change = random.randrange(5, 15) / 10.0 t.road_friction = np.random.choice( self.friction_values) * self.friction_change t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = ( x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1), ) b1_r = ( x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1), ) b2_l = ( x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2), ) b2_r = ( x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2), ) self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))) self.track = track self.track_pack = {'noises': noises, 'rads': rads} return True def reset(self, track=None): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.track_pack = None if track is None: while True: success = self._create_track([], []) if success: break else: self._create_track(track['noises'], track['rads']) self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0] def step(self, action): if action is not None: if self.tile_visited_count <= 4 and action[1] < 0.1: action[1] = 0.1 action[2] = 0 self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.calcInputs() self.state = self.render("state_pixels") step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= TIMEPENALTY # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 elif abs(self.car.offset) > 1: done = True elif abs(self.car.angle) > 1: done = True elif speed < MIN_SPEED: if self.tile_visited_count > 4: done = True #too slow return self.state, step_reward, done, {} def calcInputs(self): #calculate stuff p2 = self.road[self.car.curren_tile].center_p p1 = self.road[self.car.curren_tile - 2].center_p self.car.offset = self.calcOffset(p1, p2, self.car.hull.position) / TRACK_WIDTH p3 = self.car.wheels[2].position p4 = self.car.wheels[0].position self.car.angle = self.calcAngle(p1, p2, p3, p4) / MAX_ANGLE p3 = p2 if (self.car.curren_tile + FUTURE_SIGHT) < len(self.road): p4 = self.road[self.car.curren_tile + int(FUTURE_SIGHT / 2)].center_p p5 = self.road[self.car.curren_tile + int(FUTURE_SIGHT / 2)].center_p p6 = self.road[self.car.curren_tile + FUTURE_SIGHT].center_p else: p4 = self.road[-1].center_p p5 = p3 p6 = p4 self.car.curve1 = self.calcAngle(p1, p2, p3, p4) / MAX_ANGLE self.car.curve2 = self.calcAngle(p1, p2, p5, p6) / MAX_ANGLE self.car.slip_rate = self.calcSlipRate() self.car.yaw_velocity = self.car.hull.angularVelocity / 400.0 #tested self.car.speed = np.linalg.norm( self.car.hull.linearVelocity) / MAX_SPEED def calcSlipRate(self): f_speed = (self.car.wheels[0].omega + self.car.wheels[1].omega) / 2 r_speed = (self.car.wheels[2].omega + self.car.wheels[3].omega) / 2 if r_speed > f_speed: traction = f_speed / r_speed slip = 1 - traction return slip else: return 0 def calcOffset(self, p1, p2, p3): #normalized with tack width p1 = np.array(p1) p2 = np.array(p2) p3 = np.array(p3) side = ((p3[0] - p2[0]) * (p2[1] - p1[1]) - (p2[0] - p1[0]) * (p3[1] - p2[1])) side /= -abs(side) offset = np.linalg.norm(np.cross( p2 - p1, p1 - p3)) / np.linalg.norm(p2 - p1) * side return offset #return ()/(TRACK_WIDTH) def calcAngle(self, p1, p2, p3, p4): v1 = -np.array(p1) + np.array(p2) v2 = -np.array(p3) + np.array(p4) signed_angle = math.atan2(v1[0] * v2[1] - v1[1] * v2[0], v1[0] * v2[0] + v1[1] * v2[1]) return np.rad2deg(signed_angle) def debug_line(self, p1, p2, color=(0.0, 255, 0.0)): #debug line class Particle: pass p = Particle() p.poly = [(p1[0], p1[1]), (p2[0], p2[1])] p.color = color self.viewer.draw_polyline(p.poly, color=p.color, linewidth=2) def render(self, mode="human"): assert mode in ["human", "state_pixels", "rgb_array"] if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label( "0000", font_size=36, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x="left", anchor_y="center", color=(255, 255, 255, 255), ) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet # Animate zoom first second: zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1) scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle)), ) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") #middle line p2 = self.road[self.car.curren_tile].center_p p1 = self.road[self.car.curren_tile - 2].center_p self.debug_line(p1, p2) #curve forecast p3 = p2 if (self.car.curren_tile + FUTURE_SIGHT) < len(self.road): p4 = self.road[self.car.curren_tile + int(FUTURE_SIGHT / 2)].center_p p5 = self.road[self.car.curren_tile + int(FUTURE_SIGHT / 2)].center_p p6 = self.road[self.car.curren_tile + FUTURE_SIGHT].center_p else: p4 = self.road[-1].center_p p5 = p3 p6 = p4 self.debug_line(p3, p4, color=(50, 50, 0)) self.debug_line(p5, p6, color=(0, 100, 50)) arr = None win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() t = self.transform if mode == "rgb_array": VP_W = VIDEO_W VP_H = VIDEO_H elif mode == "state_pixels": VP_W = STATE_W VP_H = STATE_H else: pixel_scale = 1 if hasattr(win.context, "_nscontext"): pixel_scale = ( win.context._nscontext.view().backingScaleFactor()) # pylint: disable=protected-access VP_W = int(pixel_scale * WINDOW_W) VP_H = int(pixel_scale * WINDOW_H) gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() self.viewer.onetime_geoms = [] t.disable() self.render_indicators(WINDOW_W, WINDOW_H) if mode == "human": win.flip() return self.viewer.isopen image_data = (pyglet.image.get_buffer_manager().get_color_buffer(). get_image_data()) arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep="") arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): colors = [0.4, 0.8, 0.4, 1.0] * 4 polygons_ = [ +PLAYFIELD, +PLAYFIELD, 0, +PLAYFIELD, -PLAYFIELD, 0, -PLAYFIELD, -PLAYFIELD, 0, -PLAYFIELD, +PLAYFIELD, 0, ] k = PLAYFIELD / 20.0 colors.extend([0.4, 0.9, 0.4, 1.0] * 4 * 20 * 20) for x in range(-20, 20, 2): for y in range(-20, 20, 2): polygons_.extend([ k * x + k, k * y + 0, 0, k * x + 0, k * y + 0, 0, k * x + 0, k * y + k, 0, k * x + k, k * y + k, 0, ]) for poly, color in self.road_poly: colors.extend([color[0], color[1], color[2], 1] * len(poly)) for p in poly: polygons_.extend([p[0], p[1], 0]) vl = pyglet.graphics.vertex_list( len(polygons_) // 3, ("v3f", polygons_), ("c4f", colors) # gl.GL_QUADS, ) vl.draw(gl.GL_QUADS) vl.delete() def render_indicators(self, W, H): s = W / 40.0 h = H / 40.0 colors = [0, 0, 0, 1] * 4 polygons = [W, 0, 0, W, 5 * h, 0, 0, 5 * h, 0, 0, 0, 0] def vertical_ind(place, val, color): colors.extend([color[0], color[1], color[2], 1] * 4) polygons.extend([ place * s, h + h * val, 0, (place + 1) * s, h + h * val, 0, (place + 1) * s, h, 0, (place + 0) * s, h, 0, ]) def horiz_ind(place, val, color): colors.extend([color[0], color[1], color[2], 1] * 4) polygons.extend([ (place + 0) * s, 4 * h, 0, (place + val) * s, 4 * h, 0, (place + val) * s, 2 * h, 0, (place + 0) * s, 2 * h, 0, ]) true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(5, 0.02 * true_speed, (1, 1, 1)) vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1)) vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1)) vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1)) horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0)) horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0)) vl = pyglet.graphics.vertex_list( len(polygons) // 3, ("v3f", polygons), ("c4f", colors) # gl.GL_QUADS, ) vl.draw(gl.GL_QUADS) vl.delete() self.score_label.text = "%04i" % self.reward self.score_label.draw() def create_tournament(self, networks, grp_size=5): groups = [] group = {'players': []} for i in range(len(networks)): group['players'].append(deepcopy(networks[i])) if len(group['players']) == grp_size: self.track_pack = None self.reset() group['track'] = self.track_pack print(self.track_pack['noises'][10]) group['best_player'] = None groups.append(group) group = {'players': []} return groups