class PlaygroundEnv(gym.Env): """Custom Environment that follows gym interface""" metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, config): playground_name = config['playground_name'] agent_type = config['agent_type'] sensors_name = config['sensors_name'] seed = config.get('seed', 0) continuous_action_space = config.get('continuous_action_space', True) multisteps = config.get('multisteps') seed = (seed + id(self)) % (2**32) random.seed(seed) np.random.seed(seed) self.video_dir = config.get('video_dir') self.playground = PlaygroundRegister.playgrounds[playground_name[0]][ playground_name[1]]() self.playground.time_limit = 1000 self.time_limit = self.playground.time_limit self.episodes = 0 self._create_agent(agent_type, sensors_name) self._set_action_space(continuous_action_space) self._set_obs_space() self.multisteps = None if multisteps is not None: assert isinstance(multisteps, int) self.multisteps = multisteps def _set_obs_space(self): d = {} for sensor in self.agent.sensors: if isinstance(sensor.shape, int): shape = (sensor.shape, 1) else: shape = sensor.shape d[sensor.name] = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32) self.observation_space = spaces.Dict(d) def _set_action_space(self, continuous_action_space): actuators = self.agent.controller.controlled_actuators self.continuous_action_space = continuous_action_space if self.continuous_action_space: lows = [] highs = [] for actuator in actuators: lows.append(actuator.min) highs.append(actuator.max) self.action_space = spaces.Box( low=np.array(lows).astype(np.float32), high=np.array(highs).astype(np.float32)) else: # TODO: raise NotImplementedError() dims = [] for actuator in actuators: dims.append(2) self.action_space = spaces.MultiDiscrete(dims) def _create_agent(self, agent_type, sensors_name): if agent_type == 'base': agent_cls = agents.BaseAgent elif agent_type == 'arm': agent_cls = agents.FullAgent else: raise ValueError(f"Wrong agent_type: {agent_type}") agent = agent_cls(controller=controllers.External()) for sensor_name, sensor_params in get_sensor_params(sensors_name): if sensor_name == 'depth': sensor_cls = sensors.Lidar sensor_name = 'depth_0' elif sensor_name == 'rgb': sensor_cls = sensors.RgbCamera sensor_name = 'rgb_0' elif sensor_name == 'touch': sensor_cls = sensors.Touch sensor_name = 'touch_0' elif sensor_name == 'blind': sensor_cls = sensors.BlindCamera sensor_name = 'blind_0' else: raise NotImplementedError( f'Sensor {sensor_name} not implemented') agent.add_sensor( sensor_cls(anchor=agent.base_platform, normalize=True, invisible_elements=agent.parts, name=sensor_name, **sensor_params)) self.playground.add_agent(agent) self.game = Engine(self.playground, screen=False) self.agent = agent assert self.agent in self.game.agents @property def engine(self): return self.game def get_current_timestep(self): return self.game.elapsed_time def step(self, actions): actions_to_game_engine = {} actions_dict = {} actuators = self.agent.controller.controlled_actuators for actuator, action in zip(actuators, actions): actuator.apply_action(action) actions_to_game_engine[self.agent] = actions_dict # Generate actions for other agents for agent in self.game.agents: if agent is not self.agent: actions_to_game_engine[agent] = \ agent.controller.generate_actions() if self.multisteps is None: self.game.step(actions_to_game_engine) else: self.game.multiple_steps(actions_to_game_engine, n_steps=self.multisteps) self.game.update_observations() reward = self.agent.reward done = self.playground.done or not self.game.game_on return (self.observations, reward, done, {}) @property def observations(self): sensor_values = {} for sensor in self.agent.sensors: sensor_values[sensor.name] = sensor.sensor_values return sensor_values def reset(self): self.game.reset() self.game.elapsed_time = 0 self.episodes += 1 self.game.update_observations() return self.observations def render(self, mode='human'): if self.video_dir is None: return None img = self.game.generate_agent_image(self.agent) img = (255 * img).astype(np.uint8) step_id = self.game.elapsed_time video_dir = osp.join(self.video_dir, str(id(self)), str(self.episodes)) frame_path = osp.join(video_dir, f"f-{step_id:03d}.png") if not osp.exists(video_dir): os.makedirs(video_dir, exist_ok=True) cv2.imwrite(frame_path, img) return img def close(self): self.game.terminate()
for part in agent.parts: available_actions = part.get_available_actions() for action in available_actions: all_actions[agent.name][part.name][action.action.name] = [ actions[agent.name][part.name][action.action] ] elif engine.total_elapsed_time > 0 and SAVE_SIMU: for part in agent.parts: available_actions = part.get_available_actions() for action in available_actions: all_actions[agent.name][part.name][ action.action.name].append( actions[agent.name][part.name][action.action]) if engine.total_elapsed_time == 0: # Pour contourner un petit bug temporaire engine.step(actions) else: engine.multiple_steps(actions, n_steps=2) if SAVE_SIMU or SHOW_ENVIRONMENT: engine.update_observations() observation = IR_sensor.sensor_value if SAVE_SIMU: for agent in engine.agents: for sensor in agent.sensors: observation = sensor.sensor_value sensor_name = sensor.name if engine.total_elapsed_time == 1: all_observations[agent.name][sensor_name] = [observation] else:
class PlaygroundEnv(gym.Env): """Custom Environment that follows gym interface""" metadata = {'render.modes': ['human']} def __init__(self, config): super().__init__() playground_name = config['playground_name'] agent_type = config['agent_type'] sensors_name = config['sensors_name'] seed = config.get('seed', 0) continuous_action_space = config.get('continuous_action_space', True) multisteps = config.get('multisteps') controller = config.get('controller', controllers.External()) self.playground = PlaygroundRegister.playgrounds[playground_name[0]][ playground_name[1]]() seed = (seed + id(self)) % (2**32) random.seed(seed) np.random.seed(seed) if agent_type == 'base': agent_cls = agents.BaseAgent elif agent_type == 'arm': agent_cls = agents.FullAgent else: raise ValueError(f"Wrong agent_type: {agent_type}") agent = agent_cls(platform=ForwardBackwardPlatform, controller=controller) for sensor_name, sensor_params in get_sensor_params(sensors_name): if sensor_name == 'depth': agent.add_sensor( sensors.Lidar(anchor=agent.base_platform, normalize=True, **sensor_params)) elif sensor_name == 'rgb': agent.add_sensor( sensors.RgbCamera(anchor=agent.base_platform, normalize=True, **sensor_params)) elif sensor_name == 'touch': agent.add_sensor( sensors.Touch(anchor=agent.base_platform, normalize=True, **sensor_params)) self.playground.add_agent(agent) self.time_limit = self.playground.time_limit self.game = Engine(self.playground, screen=False) self.agent = agent assert self.agent in self.game.agents # Define action space actuators = self.agent.get_all_actuators() self.continuous_action_space = continuous_action_space self.actions_dict = {} if self.continuous_action_space: lows = [] highs = [] for actuator in actuators: if actuator.action_range is ActionTypes.DISCRETE: lows.append(-1) highs.append(1) elif actuator.action_range is ActionTypes.CONTINUOUS_CENTERED: lows.append(actuator.min) highs.append(actuator.max) elif actuator.action_range is ActionTypes.CONTINUOUS_NOT_CENTERED: lows.append(actuator.min) highs.append(actuator.max) else: raise ValueError(f"Action type {actuator.action} unknown") # lows.append(actuator.min) # highs.append(actuator.max) self.action_space = spaces.Box( low=np.array(lows).astype(np.float32), high=np.array(highs).astype(np.float32)) else: dims = [] for actuator in actuators: if actuator.action_range is ActionTypes.DISCRETE: dims.append(2) elif actuator.action_range is ActionTypes.CONTINUOUS_NOT_CENTERED: dims.append(2) else: raise ValueError(f"Action type {actuator.action} unknown") self.action_space = spaces.MultiDiscrete(dims) # Define observation space # Normalize all sensors to make sure they are in the same range height_all_sensors, width_all_sensors, depth_all_sensors = 1, 0, 0 for sensor in self.agent.sensors: if sensor.sensor_modality is SensorTypes.SEMANTIC: raise ValueError('Semantic sensors not supported') sensor.normalize = True if isinstance(sensor.shape, int): width_all_sensors = max(width_all_sensors, sensor.shape) depth_all_sensors += 1 elif len(sensor.shape) == 2: width_all_sensors = max(width_all_sensors, sensor.shape[0]) depth_all_sensors += sensor.shape[1] else: raise NotImplementedError # width_all_sensors = max(width_all_sensors, sensor.shape[0]) # height_all_sensors += sensor.shape[1] self.observation_space = spaces.Box(low=0, high=1, shape=(1, width_all_sensors, depth_all_sensors), dtype=np.float32) self.observations = np.zeros( (height_all_sensors, width_all_sensors, depth_all_sensors)) # Multisteps self.multisteps = None if multisteps is not None: assert isinstance(multisteps, int) self.multisteps = multisteps @property def engine(self): return self.game def get_current_timestep(self): return self.game.elapsed_time def step(self, actions): # First, send actions to game engint actions_to_game_engine = {} actions_dict = {} # Convert Stable-baselines actions into game engine actions for actuator, action in zip(self.agent.get_all_actuators(), actions): action_type = actuator.action converted_action = action # convert discrete action to binry if self.continuous_action_space and action_type is ActionTypes.DISCRETE: converted_action = 0 if converted_action < 0 else 1 # convert continuous actions in [-1, 1] elif (not self.continuous_action_space) and ( action_type is ActionTypes.CONTINUOUS_CENTERED): converted_action = converted_action - 1 actions_dict[actuator] = converted_action actions_to_game_engine[self.agent] = actions_dict # Generate actions for other agents for agent in self.game.agents: if agent is not self.agent: actions_to_game_engine[ agent] = agent.controller.generate_actions() # Now that we have all ctions, run the engine, and get the observations if self.multisteps is None: terminate = self.game.step(actions_to_game_engine) else: terminate = self.game.multiple_steps(actions_to_game_engine, n_steps=self.multisteps) self.game.update_observations() # Concatenate the observations in a format that stable-baselines understands sensor_values = [] for sensor in self.agent.sensors: if isinstance(sensor.shape, int): sensor_values.append(sensor.sensor_values[np.newaxis, :, np.newaxis]) # self.observations[0, :sensor.shape, # current_channel] = sensor.sensor_values[:] # current_channel += 1 elif len(sensor.shape) == 2: sensor_values.append(sensor.sensor_values[np.newaxis, :]) # self.observations[0, :sensor.shape[0], # current_channel] = sensor.sensor_values[:, :] # current_channel += sensor.shape[1] else: raise NotImplementedError # self.observations[:sensor.shape[0], :sensor. # shape[1], :] = sensor.sensor_values[:, :, :] # current_channel += sensor.shape[2] self.observations = np.concatenate(sensor_values, axis=2) reward = self.agent.reward done = self.playground.done or terminate return (self.observations, reward, done, {}) def reset(self): self.game.reset() self.game.elapsed_time = 0 return np.zeros(self.observations.shape) def render(self, mode='human'): img = self.game.generate_playground_image() return img def close(self): self.game.terminate()