def set_env_config(self, robot, layout): self.nsmr.set_config(robot, layout) self.set_gym_space() self.renderer = Renderer(self.nsmr.dimentions, self.nsmr.layout['resolution'], self.nsmr.robot) self.reset()
def __init__(self, layout=SIMPLE_MAP, reward_params={"goal_reward": 5.0, "collision_penalty": 5.0, "alpha": 0.3, "beta": 0.01, "stop_penalty": 0.05}, ): # simulator self.nsmr = NSMR(layout) # gym space self.observation_space = spaces.Dict(dict( lidar=spaces.Box(low=MIN_RANGE, high=MAX_RANGE, shape=(NUM_LIDAR,), dtype=np.float32), target=spaces.Box(np.array([MIN_TARGET_DISTANCE,-1.0,-1.0]), np.array([MAX_TARGET_DISTANCE,1.0,1.0]), dtype=np.float32) )) self.action_space = spaces.Box( low = np.array([MIN_LINEAR_VELOCITY,MIN_ANGULAR_VELOCITY]), high = np.array([MAX_LINEAR_VELOCITY,MAX_ANGULAR_VELOCITY]), dtype = np.float32 ) # renderer self.renderer = Renderer(self.nsmr.dimentions) # reward params self.reward_params = reward_params self.mode='rgb_array' self.reset()
def __init__( self, robot="robot", layout="simple_map", reward_params={ "goal_reward": 5.0, "collision_penalty": 5.0, "alpha": 0.3, "beta": 0.01, "stop_penalty": 0.05 }, max_steps=500): # simulator self.nsmr = NSMR(robot=robot, layout=layout) # gym space self.set_gym_space() # renderer self.renderer = Renderer(self.nsmr.dimentions, self.nsmr.layout['resolution'], self.nsmr.robot) # reward params self.reward_params = reward_params self.max_steps = max_steps self.reset()
def __init__(self, layout=NO_OBJECT, reward_params={"goal_reward": 5.0, "alpha": 0.3, "beta": 0.01, "stop_penalty": 0.05}, ): # simulator self.nsmr = NSMR(layout) # gym space self.observation_space = spaces.Dict(dict( pose=spaces.Box(np.array([-10,-10,-3.141592]), np.array([10,10,3.141592])), target=spaces.Box(np.array([-10,-10,-3.141592]), np.array([10,10,3.141592])) )) self.action_space = spaces.Box( np.array([MIN_LINEAR_VELOCITY,MIN_ANGULAR_VELOCITY]), np.array([MAX_LINEAR_VELOCITY,MAX_ANGULAR_VELOCITY])) # renderer self.renderer = Renderer(self.nsmr.dimentions) # reward params self.reward_params = reward_params self.reset()
class NsmrGymEnv(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50 } def __init__( self, robot="robot", layout="simple_map", reward_params={ "goal_reward": 5.0, "collision_penalty": 5.0, "alpha": 0.3, "beta": 0.01, "stop_penalty": 0.05 }, max_steps=500): # simulator self.nsmr = NSMR(robot=robot, layout=layout) # gym space self.set_gym_space() # renderer self.renderer = Renderer(self.nsmr.dimentions, self.nsmr.layout['resolution'], self.nsmr.robot) # reward params self.reward_params = reward_params self.max_steps = max_steps self.reset() def set_reward_params(self, reward_params): self.reward_params = reward_params self.reset() def set_env_config(self, robot, layout): self.nsmr.set_config(robot, layout) self.set_gym_space() self.renderer = Renderer(self.nsmr.dimentions, self.nsmr.layout['resolution'], self.nsmr.robot) self.reset() def reset(self): self.t = 0 self.nsmr.reset_pose() self.nsmr.reset_noise_param() observation = self.get_observation() self.pre_dis = observation["target"][0] self.goal = False return observation def step(self, action): self.t += 1 self.nsmr.update(action) observation = self.get_observation() reward = self.get_reward(observation) done = self.is_done() info = { "pose": self.nsmr.pose, "target": self.nsmr.target, "goal": self.goal } return observation, reward, done, info def render(self, mode='human'): return self.renderer.render(self.nsmr, mode) def get_observation(self): observation = {} observation["lidar"] = self.nsmr.get_lidar() observation["target"] = self.nsmr.get_relative_target_position() return observation def get_reward(self, observation): dis = observation["target"][0] ddis = self.pre_dis - dis theta = np.arccos(observation["target"][2]) if dis < self.nsmr.robot["radius"]: reward = self.reward_params["goal_reward"] self.goal = True elif self.nsmr.is_collision(): reward = -self.reward_params["collision_penalty"] else: reward = self.reward_params["alpha"] * ddis if abs(ddis) < 1e-6: reward -= self.reward_params["stop_penalty"] reward -= self.reward_params["beta"] / (2 * np.pi) * abs(theta) self.pre_dis = dis return reward def is_done(self): done = False if self.t >= self.max_steps: done = True if self.nsmr.is_collision(): done = True if self.goal: done = True return done def close(self): self.renderer.close() def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def set_gym_space(self): # gym space self.observation_space = spaces.Dict( dict(lidar=spaces.Box( low=self.nsmr.robot["lidar"]["min_range"], high=self.nsmr.robot["lidar"]["max_range"], shape=(self.nsmr.robot["lidar"]["num_range"], ), dtype=np.float32), target=spaces.Box(np.array([0.0, -1.0, -1.0]), np.array([100.0, 1.0, 1.0]), dtype=np.float32))) self.action_space = spaces.Box( low=np.array([ self.nsmr.robot["min_linear_velocity"], self.nsmr.robot["min_angular_velocity"] ]), high=np.array([ self.nsmr.robot["max_linear_velocity"], self.nsmr.robot["max_angular_velocity"] ]), dtype=np.float32)
class NsmrGymEnv2(gym.Env): def __init__(self, layout=SIMPLE_MAP, reward_params={"goal_reward": 5.0, "collision_penalty": 5.0, "alpha": 0.3, "beta": 0.01, "stop_penalty": 0.05}, ): # simulator self.nsmr = NSMR(layout) # gym space self.observation_space = spaces.Dict(dict( lidar=spaces.Box(low=MIN_RANGE, high=MAX_RANGE, shape=(NUM_LIDAR,), dtype=np.float32), target=spaces.Box(np.array([MIN_TARGET_DISTANCE,-1.0,-1.0]), np.array([MAX_TARGET_DISTANCE,1.0,1.0]), dtype=np.float32) )) self.action_space = spaces.Box( low = np.array([MIN_LINEAR_VELOCITY,MIN_ANGULAR_VELOCITY]), high = np.array([MAX_LINEAR_VELOCITY,MAX_ANGULAR_VELOCITY]), dtype = np.float32 ) # renderer self.renderer = Renderer(self.nsmr.dimentions) # reward params self.reward_params = reward_params self.mode='rgb_array' self.reset() def set_reward_params(self, reward_params): self.reward_params = reward_params self.reset() def set_layout(self, layout): self.nsmr.set_layout(layout) self.renderer = Renderer(self.nsmr.dimentions) self.reset() def reset(self): self.t = 0 self.nsmr.reset_pose() self.nsmr.reset_noise_param() observation = self.get_observation_() self.pre_dis = observation["target"][0] self.goal = False self.final_target = self.nsmr.target.copy() self.theta = 0 self.pre_pose = self.nsmr.pose return observation def step(self, action): self.t += 1 self.pre_pose = self.nsmr.pose self.pre_dis = self.nsmr.get_relative_target_position()[0] self.nsmr.update(action) observation = self.get_observation() self.reward = self.get_reward(observation) done = self.is_done(observation) info = {"pose": self.nsmr.pose, "target": self.nsmr.target} return observation, self.reward, done, info def render(self, target, mode='human'): self.renderer.render(self.nsmr, self.mode, target) def get_observation(self): observation = {} observation["lidar"] = self.nsmr.get_lidar() #observation["target"] = self.nsmr.get_relative_target_position() observation["target"] = self.nsmr.get_subgoal2() return observation def get_observation_(self): observation = {} observation["lidar"] = self.nsmr.get_lidar() observation["target"] = self.nsmr.get_relative_target_position() #observation["target"] = self.nsmr.get_subgoal() return observation def get_reward(self, observation): #self.theta = np.arctan2(observation["target"][2], observation["target"][3]) relative_target = self.nsmr.get_relative_target_position() #ddis = self.nsmr.get_relative_target_position(self.pre_pose, self.nsmr.pose)[0] ddis = self.pre_dis - relative_target[0] theta2 = np.arctan2(relative_target[1], relative_target[2]) #reward = 0.1 * np.sqrt((observation["target"][0])**2 + (observation["target"][1])**2 + (theta)**2) reward = 0 if relative_target[0] < ROBOT_RADIUS: reward += 5 else: reward += 0.15 * ddis + 0.01 * abs(theta2)/(2*np.pi) if(abs(ddis)<1e-6): reward -= 0.05 self.pre_dis = relative_target[0] return reward def is_done(self, observation): done = False if self.t >= MAX_STEPS: done = True elif self.nsmr.is_collision(): done = True #if self.goal: # done = True #if self.reward < 0.5: #if self.nsmr.get_relative_target_position()[0] < 0.2 and np.rad2deg(np.abs(self.theta)) < 5: elif self.nsmr.get_relative_target_position()[0] < ROBOT_RADIUS: done = True elif self.nsmr.get_relative_target_position(target=self.final_target)[0] < ROBOT_RADIUS: done = True return done def close(self): self.renderer.close() def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed]
def set_layout(self, layout): self.nsmr.set_layout(layout) self.renderer = Renderer(self.nsmr.dimentions) self.reset()
class NsmrSimpleGymEnv(gym.Env): def __init__(self, robot="robot", layout="no_object", reward_params={"goal_reward": 5.0, "alpha": 0.3, "beta": 0.01, "stop_penalty": 0.05}, max_steps=500 ): # simulator self.nsmr = NSMR(robot=robot, layout=layout) # gym space self.set_gym_space() # renderer self.renderer = Renderer(self.nsmr.dimentions, self.nsmr.layout['resolution'], self.nsmr.robot) # reward params self.reward_params = reward_params self.max_steps = max_steps self.reset() def set_reward_params(self, reward_params): self.reward_params = reward_params self.reset() def set_env_config(self, robot, layout): self.nsmr.set_config(robot, layout) self.set_gym_space() self.renderer = Renderer(self.nsmr.dimentions, self.nsmr.layout['resolution'], self.nsmr.robot) self.reset() def reset(self): self.t = 0 self.nsmr.reset_pose() self.nsmr.reset_noise_param() observation = self.get_observation() self.pre_dis = observation["target"][0] self.goal = False return observation def step(self, action): self.t += 1 self.nsmr.update(action) observation = self.get_observation() reward = self.get_reward(observation) done = self.is_done() info = {} return observation, reward, done, info def render(self, mode='human'): self.renderer.render(self.nsmr, mode) def get_observation(self): observation = {} observation["pose"] = self.nsmr.pose observation["target"] = self.nsmr.target return observation def get_reward(self, observation): target_info = self.nsmr.get_relative_target_position() dis = target_info[0] ddis = self.pre_dis - dis theta = np.arccos(target_info[2]) if dis < self.nsmr.robot["radius"]: reward = self.reward_params["goal_reward"] self.goal = True else: reward = self.reward_params["alpha"] * ddis if abs(ddis) < 1e-6: reward -= self.reward_params["stop_penalty"] reward -= self.reward_params["beta"]/(2*np.pi)*abs(theta) self.pre_dis = dis return reward def is_done(self): done = False if self.t >= self.max_steps: done = True if self.goal: done = True return done def close(self): self.renderer.close() def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def set_gym_space(self): # gym space self.observation_space = spaces.Dict(dict( pose=spaces.Box(np.array([-10,-10,-3.141592]), np.array([10,10,3.141592])), target=spaces.Box(np.array([-10,-10,-3.141592]), np.array([10,10,3.141592])) )) self.action_space = spaces.Box( low = np.array([self.nsmr.robot["min_linear_velocity"], self.nsmr.robot["min_angular_velocity"]]), high = np.array([self.nsmr.robot["max_linear_velocity"], self.nsmr.robot["max_angular_velocity"]]), dtype = np.float32 )
class NsmrSimpleGymEnv(gym.Env): def __init__(self, layout=NO_OBJECT, reward_params={"goal_reward": 5.0, "alpha": 0.3, "beta": 0.01, "stop_penalty": 0.05}, ): # simulator self.nsmr = NSMR(layout) # gym space self.observation_space = spaces.Dict(dict( pose=spaces.Box(np.array([-10,-10,-3.141592]), np.array([10,10,3.141592])), target=spaces.Box(np.array([-10,-10,-3.141592]), np.array([10,10,3.141592])) )) self.action_space = spaces.Box( np.array([MIN_LINEAR_VELOCITY,MIN_ANGULAR_VELOCITY]), np.array([MAX_LINEAR_VELOCITY,MAX_ANGULAR_VELOCITY])) # renderer self.renderer = Renderer(self.nsmr.dimentions) # reward params self.reward_params = reward_params self.reset() def set_reward_params(self, reward_params): self.reward_params = reward_params self.reset() def set_layout(self, layout): self.nsmr.set_layout(layout) self.renderer = Renderer(self.nsmr.dimentions) self.reset() def reset(self): self.t = 0 self.nsmr.reset_pose() self.nsmr.reset_noise_param() observation = self.get_observation() self.pre_dis = observation["target"][0] self.goal = False return observation def step(self, action): self.t += 1 self.nsmr.update(action) observation = self.get_observation() reward = self.get_reward(observation) done = self.is_done() info = {} return observation, reward, done, info def render(self, mode='human'): self.renderer.render(self.nsmr, mode) def get_observation(self): observation = {} observation["pose"] = self.nsmr.pose observation["target"] = self.nsmr.target return observation def get_reward(self, observation): target_info = self.nsmr.get_relative_target_position() dis = target_info[0] ddis = self.pre_dis - dis theta = np.arccos(target_info[2]) if dis < ROBOT_RADIUS: reward = self.reward_params["goal_reward"] self.goal = True else: reward = self.reward_params["alpha"] * ddis if abs(ddis) < 1e-6: reward -= self.reward_params["stop_penalty"] reward -= self.reward_params["beta"]/(2*np.pi)*abs(theta) self.pre_dis = dis return reward def is_done(self): done = False if self.t >= MAX_STEPS: done = True if self.goal: done = True return done def close(self): self.renderer.close() def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed]