def __init__(self, polar_coords=True): self.polar_coords = polar_coords # the position will be overwritten later default_pos = np.zeros((2,)) self.car = Car(default_pos, 0, 0, params) self.car_dim = np.linalg.norm(params.car_size) self.goal_pos = default_pos self.goal_dim = np.linalg.norm(params.goal_size) self.obs_dim = np.linalg.norm(params.obstacle_size) self.actions = [[0, 0], [0, -1], [0, 1], [1, 0]] self.num_actions = len(self.actions) self.action_space = spaces.Discrete(self.num_actions) if self.polar_coords: min = np.array([params.min_speed, *[0, -np.pi] * 3]) max = np.array([params.max_speed, *[np.finfo(np.float32).max, +np.pi] * 3]) else: min = np.array([params.min_speed, *[-np.finfo(np.float32).max, -np.finfo(np.float32).max] * 3]) max = np.array([params.max_speed, *[np.finfo(np.float32).max, np.finfo(np.float32).max] * 3]) self.observation_space = spaces.Box(min, max) self.seed()
def __init__(self): # fillcolor self.fillvalue = 255 # set up numpy arrays to be drawn to self.canvas = np.zeros((*params.screen_size, 3), dtype=np.uint8) self.obstacle_layer = np.zeros((*params.screen_size, 3), dtype=np.uint8) self.obstacle_mask = np.zeros((*params.screen_size, ), dtype=np.bool) self.goal_layer = np.zeros((*params.screen_size, 3), dtype=np.uint8) self.goal_mask = np.zeros((*params.screen_size, ), dtype=np.bool) self.background = np.zeros((*params.screen_size, 3), dtype=np.uint8) self.car_layer = np.zeros((*params.screen_size, 3), dtype=np.uint8) self.car_mask = np.zeros((*params.screen_size, ), dtype=np.bool) # load images and set up their masks car_img_transp = imread("environments/obstacle_car/assets/car.png") car_img_transp = np.transpose(car_img_transp, [1, 0, 2]) car_img_transp = resize(car_img_transp, params.car_size) car_img = car_img_transp[:, :, :3] # cut away alpha car_img = (car_img * 255).astype(np.uint8) car_mask = (car_img_transp[:, :, 3] > 0).astype(np.bool) obstacle_img = np.zeros((*params.obstacle_size, 3), dtype=np.uint8) obstacle_img[:, :, 0] = ( 255 * np.sin(np.linspace(0, 2 * np.pi, params.obstacle_size[0])).reshape( (-1, 1))).astype(np.uint8) goal_img = np.zeros((*params.goal_size, 3), dtype=np.uint8) goal_img[:, :, 1] = (255 * np.sin( np.linspace(0, 4 * np.pi, params.goal_size[1]))).astype(np.uint8) obstacle_mask = np.ones(params.obstacle_size, dtype=np.bool) goal_mask = np.ones(params.goal_size, dtype=np.bool) # the position will be overwritten later default_pos = np.zeros((2, )) self.car_sprite = Sprite(car_img, car_mask, default_pos, 0) self.obstacle_sprite = Sprite(obstacle_img, obstacle_mask, default_pos, 0) self.goal_sprite = Sprite(goal_img, goal_mask, default_pos, 0) # car and car_sprite are not the same # one is just for graphics, the other is for dynamic movement of the car self.car = Car(default_pos, 0, 0, params) self.actions = [[0, 0], [0, -1], [0, 1], [1, 0]] self.num_actions = len(self.actions) self.action_space = spaces.Discrete(self.num_actions) self.seed()
class Environment_Graphical(gym.Env): def __init__(self): # fillcolor self.fillvalue = 255 # set up numpy arrays to be drawn to self.canvas = np.zeros((*params.screen_size, 3), dtype=np.uint8) self.obstacle_layer = np.zeros((*params.screen_size, 3), dtype=np.uint8) self.obstacle_mask = np.zeros((*params.screen_size, ), dtype=np.bool) self.goal_layer = np.zeros((*params.screen_size, 3), dtype=np.uint8) self.goal_mask = np.zeros((*params.screen_size, ), dtype=np.bool) self.background = np.zeros((*params.screen_size, 3), dtype=np.uint8) self.car_layer = np.zeros((*params.screen_size, 3), dtype=np.uint8) self.car_mask = np.zeros((*params.screen_size, ), dtype=np.bool) # load images and set up their masks car_img_transp = imread("environments/obstacle_car/assets/car.png") car_img_transp = np.transpose(car_img_transp, [1, 0, 2]) car_img_transp = resize(car_img_transp, params.car_size) car_img = car_img_transp[:, :, :3] # cut away alpha car_img = (car_img * 255).astype(np.uint8) car_mask = (car_img_transp[:, :, 3] > 0).astype(np.bool) obstacle_img = np.zeros((*params.obstacle_size, 3), dtype=np.uint8) obstacle_img[:, :, 0] = ( 255 * np.sin(np.linspace(0, 2 * np.pi, params.obstacle_size[0])).reshape( (-1, 1))).astype(np.uint8) goal_img = np.zeros((*params.goal_size, 3), dtype=np.uint8) goal_img[:, :, 1] = (255 * np.sin( np.linspace(0, 4 * np.pi, params.goal_size[1]))).astype(np.uint8) obstacle_mask = np.ones(params.obstacle_size, dtype=np.bool) goal_mask = np.ones(params.goal_size, dtype=np.bool) # the position will be overwritten later default_pos = np.zeros((2, )) self.car_sprite = Sprite(car_img, car_mask, default_pos, 0) self.obstacle_sprite = Sprite(obstacle_img, obstacle_mask, default_pos, 0) self.goal_sprite = Sprite(goal_img, goal_mask, default_pos, 0) # car and car_sprite are not the same # one is just for graphics, the other is for dynamic movement of the car self.car = Car(default_pos, 0, 0, params) self.actions = [[0, 0], [0, -1], [0, 1], [1, 0]] self.num_actions = len(self.actions) self.action_space = spaces.Discrete(self.num_actions) self.seed() def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def reset(self): self.steps = 0 # set up values for dynamics self.car_sprite.set_rotation(0) self.car.rot = 0 self.car.speed = 0 # set up car, obstacle and goal positions car_position = np.array([0, 0], dtype=np.float64) car_position[0] = self.np_random.uniform( params.car_size[0] / 2, params.screen_size[0] - params.car_size[0] / 2) car_position[1] = params.screen_size[1] - params.car_size[1] / 2 self.car_sprite.set_position(car_position) self.car.pos = car_position goal_position = np.array([0, 0]) goal_position[0] = self.np_random.uniform( 0, params.screen_size[0] - params.goal_size[0]) goal_position[1] = params.goal_size[1] / 2 self.goal_sprite.set_position(goal_position) min_dist = (1.5 * self.car_sprite.dim + min(self.goal_sprite.size)) self.obstacle_positions = [] for i in range(params.num_obstacles): while True: obs_x = self.np_random.rand() * params.screen_size[0] obs_y = params.screen_size[1] * 1 / 3 * (1 + self.np_random.rand()) obstacle_position = np.array([obs_x, obs_y]) # obstacle must be away from car and goal car_dist = np.linalg.norm(obstacle_position - self.car.pos) goal_dist = np.linalg.norm(obstacle_position - self.goal_sprite.pos) if car_dist > min_dist and goal_dist > min_dist: self.obstacle_positions.append(obstacle_position) break # render to background self.goal_layer[:] = self.fillvalue self.goal_mask[:] = False self.goal_sprite.render(self.goal_layer, self.goal_mask) self.obstacle_layer[:] = self.fillvalue self.obstacle_mask[:] = False for obstacle_position in self.obstacle_positions: self.obstacle_sprite.set_position(obstacle_position) self.obstacle_sprite.render(self.obstacle_layer, self.obstacle_mask) self.background[:] = self.fillvalue self.background[self.obstacle_mask] = self.obstacle_layer[ self.obstacle_mask] self.background[self.goal_mask] = self.goal_layer[self.goal_mask] return self.render() def render(self): # TODO: after inheriting from gym.Env this is supposed to do something different # refactor to gym interface # reset canvas and foreground, # background is not rerendered self.canvas[:] = self.background self.car_layer[:] = self.fillvalue self.car_mask[:] = False # plot the car self.car_sprite.render(self.car_layer, self.car_mask) # overlay foreground to canvas self.canvas[self.car_mask] = self.car_layer[self.car_mask] return self.canvas def step(self, action): assert self.action_space.contains(action) # internally the action is not a number, but a combination of acceleration and steering action = self.actions[action] obs, rew, done = self.make_action(action) return obs, rew, done, {} def make_action(self, action): acceleration, steering_angle = action old_dist = np.linalg.norm(self.car.pos - self.goal_sprite.pos) self.car.update(acceleration, steering_angle) new_dist = np.linalg.norm(self.car.pos - self.goal_sprite.pos) # if params.reward_distance != 0, # then the environment rewards you for moving closer to the goal dist_reward = (old_dist - new_dist) * params.reward_distance x, y = self.car.pos border_collision = False if x > params.screen_size[0]: border_collision = True self.car.pos[0] = params.screen_size[0] elif x < 0: border_collision = True self.car.pos[0] = 0 if y > params.screen_size[1]: border_collision = True self.car.pos[1] = params.screen_size[1] elif y < 0: border_collision = True self.car.pos[1] = 0 if border_collision: self.car.speed = 0 # sync dynamics and graphics self.car_sprite.set_position(self.car.pos) self.car_sprite.set_rotation(-self.car.rot) # update rendering # attention: this has an important side effect: # it also updates the occupation masks for foreground and background observation = self.render() if border_collision and params.stop_on_border_collision: return observation, params.reward_collision, True reward, collides = self.check_collisions() if collides: return observation, reward, True self.steps += 1 if self.steps > params.timeout: return observation, params.reward_timestep + dist_reward, True return observation, params.reward_timestep + dist_reward, False def check_collisions(self): if np.any(self.car_mask[self.obstacle_mask]): return params.reward_collision, True if np.any(self.car_mask[self.goal_mask]): return params.reward_goal, True return 0, False def sample_action(self): # for atari, the actions are simply numbers return self.np_random.choice(self.num_actions)
class Environment_Vec(gym.Env): def __init__(self, params, polar_coords=True): self.params = params self.polar_coords = polar_coords # the position will be overwritten later default_pos = np.zeros((2, )) self.car = Car(default_pos, 0, 0, self.params) self.car_dim = np.linalg.norm(params.car_size) self.goal_pos = default_pos self.goal_dim = np.linalg.norm(params.goal_size) self.obs_dim = np.linalg.norm(params.obstacle_size) self.actions = [[0, 0], [0, -1], [0, 1], [1, 0]] self.num_actions = len(self.actions) self.action_space = spaces.Discrete(self.num_actions) if self.polar_coords: min = np.array( [params.min_speed, *[0, -np.pi] * (params.num_obstacles + 1)]) max = np.array([ params.max_speed, *[np.finfo(np.float32).max, +np.pi] * (params.num_obstacles + 1) ]) else: min = np.array([ params.min_speed, *[-np.finfo(np.float32).max, -np.finfo(np.float32).max] * (params.num_obstacles + 1) ]) max = np.array([ params.max_speed, *[np.finfo(np.float32).max, np.finfo(np.float32).max] * (params.num_obstacles + 1) ]) self.observation_space = spaces.Box(min, max) self.seed() def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def render_to_canvas(self, canvas): x_coords = np.arange(canvas.shape[0]) y_coords = np.arange(canvas.shape[1]) x, y = np.meshgrid(x_coords, y_coords) coords = np.stack([x, y], axis=-1) observation = self.get_observation() print(observation) # first observation is speed, throw it away # the rest has to be rescaled to the original range observation = observation[1:] # then we organize it as vectors observation = observation.reshape((-1, 2)) # if the environment is based on polar coordinates, we transform them into cartesian if self.polar_coords: distances = observation[:, 0] angles = observation[:, 1] x = distances * np.cos(angles) y = distances * np.sin(angles) observation = np.stack([y, x], axis=-1) observation = observation * self.params.distance_rescale offset = np.array([canvas.shape[0] // 2, canvas.shape[1] // 2]) observation = (observation + offset).astype(np.int) goal = observation[0] obstacles = observation[1:] goal = np.array(canvas.shape[:2]) - goal obstacles = np.array(canvas.shape[:2]) - obstacles dist = coords - goal dist = np.linalg.norm(dist, axis=-1) area = np.where(dist < self.initial_dist * self.params.max_dist) canvas[area[1], area[0], 2] = 0.5 if np.all(goal > 0) and np.all(goal < canvas.shape[:2]): canvas[goal[0] - 5:goal[0] + 5, goal[1] - 5:goal[1] + 5, :] = 1 for obstacle in obstacles: if np.all(obstacle > 0) and np.all(obstacle < canvas.shape[:2]): canvas[obstacle[0] - 5:obstacle[0] + 5, obstacle[1] - 5:obstacle[1] + 5, 0] = 1 # a green dot at the center of the canvas, for our car canvas[offset[0] - 5:offset[0] + 5, offset[1] - 5:offset[1] + 5, 1] = 1 return canvas def reset(self): self.steps = 0 # set up values for dynamics self.car.rot = 0 self.car.speed = 0 # set up car, obstacle and goal positions car_position = np.array([0, 0], dtype=np.float64) car_position[0] = self.params.screen_size[ 0] // 2 # self.np_random.uniform(params.car_size[0] / 2,self.params.screen_size[0] -self.params.car_size[0] / 2) car_position[ 1] = self.params.screen_size[1] - self.params.car_size[1] / 2 self.car.pos = car_position goal_position = np.array([0, 0]) goal_position[0] = self.params.screen_size[ 0] // 2 # self.np_random.uniform(0,self.params.screen_size[0] -self.params.goal_size[0]) goal_position[1] = self.params.goal_size[1] / 2 self.goal_pos = goal_position # if the car gets too far away from the goal, # we stop the simulation # this stop is based on the initial distance self.initial_dist = np.linalg.norm(self.car.pos - self.goal_pos) # minimum distance an obstacle needs to have from car and goal min_dist = (1.5 * self.car_dim + self.goal_dim) self.obstacle_positions = [] for i in range(self.params.num_obstacles): while True: obs_x = self.params.screen_size[0] // 2 + ( self.np_random.rand() - 0.5) * self.params.obs_x_spread obs_y = self.params.screen_size[1] * self.np_random.rand() obstacle_position = np.array([obs_x, obs_y]) # obstacle must be away from car and goal car_dist = np.linalg.norm(obstacle_position - self.car.pos) goal_dist = np.linalg.norm(obstacle_position - self.goal_pos) if car_dist > min_dist and goal_dist > min_dist: self.obstacle_positions.append(obstacle_position) break return self.get_observation() def get_observation(self, rotated=True): # set up a rotation matrix if rotated: theta = self.car.rot / 180 * np.pi else: theta = 0 c, s = np.cos(theta), np.sin(theta) mat = np.array(((c, -s), (s, c))) # stack obstacle and goal positions targets = np.vstack([self.goal_pos, *self.obstacle_positions]) # origin is car position targets = self.car.pos - targets # rotate to face car targets = (mat @ targets.T).T if self.polar_coords: distances = np.linalg.norm(targets, axis=1) distances = distances / self.params.distance_rescale angles = np.arctan2(targets[:, 0], targets[:, 1]) distance_angles = np.array(list(zip(distances, angles))) idx_sorted = np.argsort(distance_angles[1:], axis=0)[:, 0] distance_angles[1:] = distance_angles[1:][idx_sorted] observation_vector = np.stack( [self.car.speed, *distance_angles.flatten()]) else: targets = targets / self.params.distance_rescale observation_vector = np.stack([self.car.speed, *targets.flatten()]) return observation_vector def step(self, action): assert self.action_space.contains(action) # internally the action is not a number, but a combination of acceleration and steering action = self.actions[action] obs, rew, done = self.make_action(action) return obs, rew, done, {} def make_action(self, action): acceleration, steering_angle = action old_dist = np.linalg.norm(self.car.pos - self.goal_pos) self.car.update(acceleration, steering_angle) new_dist = np.linalg.norm(self.car.pos - self.goal_pos) # ifself.params.reward_distance != 0, # then the environment rewards you for moving closer to the goal dist_reward = (old_dist - new_dist) * self.params.reward_distance observation_vector = self.get_observation() targets = observation_vector[1:].reshape((-1, 2)) relative_goal_position = self.car.pos - self.goal_pos x_distance = abs(relative_goal_position[0]) if x_distance > self.params.x_tolerance: return observation_vector, self.params.reward_collision, True if self.polar_coords: distances = targets[:, 0] distances = distances * self.params.distance_rescale else: targets = targets * self.params.distance_rescale distances = np.linalg.norm(targets, axis=1) # we have moved out of the simulation domain if new_dist > self.params.max_dist * self.initial_dist: return observation_vector, self.params.reward_collision, True rel_goal_dist = distances[0] if rel_goal_dist < 1 / 2 * (self.car_dim + self.goal_dim): return observation_vector, self.params.reward_goal, True rel_obs_dist = distances[1:] if np.any(rel_obs_dist < 1 / 2 * (self.car_dim + self.obs_dim)): return observation_vector, self.params.reward_collision, True self.steps += 1 if self.steps > self.params.timeout: return observation_vector, self.params.reward_timestep + dist_reward, True return observation_vector, self.params.reward_timestep + dist_reward, False def sample_action(self): # for atari, the actions are simply numbers return self.np_random.choice(self.num_actions)