class MadrasEnv(TorcsEnv, gym.Env): """Definition of the Gym Madras Environment.""" def __init__(self, cfg_path=None): # If `visualise` is set to False torcs simulator will run in headless mode self.step_num = 0 self._config = MadrasConfig() self._config.update(parse_yaml(cfg_path)) self.torcs_proc = None self.observation_handler = oh.ObservationHandler( self._config.observations, self._config.vision) self.set_observation_and_action_spaces() self.reward_handler = rh.RewardHandler(self._config.rewards) self.done_handler = dh.DoneHandler(self._config.dones) self.torcs_server_port = self._config.torcs_server_port self.state_dim = self.observation_handler.get_state_dim( ) # No. of sensors input self.env_name = 'Madras_Env' self.client_type = 0 # Snakeoil client type self.initial_reset = True if self._config.pid_assist: self.PID_controller = PIDController(self._config.pid_settings) self.ob = None self.seed() self.torcs_server_config = torcs_config.TorcsConfig( self._config.server_config, randomize=self._config.randomize_env) self.start_torcs_process() def validate_config(self): num_traffic_agents_in_sim_options = len( self._config.traffic) if self._config.traffic else 0 assert self.torcs_server_config.max_cars == ( num_traffic_agents_in_sim_options + 1) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] @property def config(self): return self._config def set_observation_and_action_spaces(self): if self._config.pid_assist: self.action_dim = 2 # LanePos, Velocity self.action_space = gym.spaces.Box( low=np.asarray([-1.0, -140.0]), high=np.asarray( [1.0, 140.0])) # Max speed of 140 kmph is allowed in TORCS else: self.action_dim = 3 # Steer, Accel, Brake self.action_space = gym.spaces.Box(low=np.asarray([-1.0, 0.0, 0.0]), high=np.asarray([1.0, 1.0, 1.0])) if self._config.normalize_actions: self.action_space = gym.spaces.Box(low=-np.ones(self.action_dim), high=np.ones(self.action_dim)) self.observation_space = self.observation_handler.get_observation_space( ) def test_torcs_server_port(self): udp = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) try: udp.bind(('', self.torcs_server_port)) except: logging.info("Specified torcs_server_port {} is not available. " "Searching for alternative...".format( self.torcs_server_port)) udp.bind(('', 0)) _, self.torcs_server_port = udp.getsockname() logging.info("torcs_server_port has been reassigned to {}".format( self.torcs_server_port)) udp.close() def start_torcs_process(self): if self.torcs_proc is not None: os.killpg(os.getpgid(self.torcs_proc.pid), signal.SIGKILL) time.sleep(0.5) self.torcs_proc = None self.test_torcs_server_port() if self._config.traffic: self.traffic_manager = traffic.MadrasTrafficHandler( self.torcs_server_port, 1, self._config.traffic) TorcsEnv.__init__( self, vision=self._config.vision, throttle=self._config.throttle, gear_change=self._config.gear_change, visualise=self._config.visualise, no_of_visualisations=self._config.no_of_visualisations, torcs_server_port=self.torcs_server_port, noisy_observations=self._config.noisy_observations) command = None rank = MPI.COMM_WORLD.Get_rank() self.torcs_server_config.generate_torcs_server_config() self.madras_agent_port = self.torcs_server_port + self.torcs_server_config.num_traffic_cars if rank < self._config.no_of_visualisations and self._config.visualise: command = 'export TORCS_PORT={} && vglrun torcs -t 10000000 -nolaptime'.format( self.torcs_server_port) else: command = 'export TORCS_PORT={} && torcs -t 10000000 -r ~/.torcs/config/raceman/quickrace.xml -nolaptime'.format( self.torcs_server_port) if self._config.vision is True: command += ' -vision' if self._config.noisy_observations: command += ' -noisy' self.torcs_proc = subprocess.Popen([command], shell=True, preexec_fn=os.setsid) time.sleep(1) def reset(self): """Reset Method to be called at the end of each episode.""" self.step_num = 0 if not self.initial_reset: self.torcs_server_config.generate_torcs_server_config() self.madras_agent_port = self.torcs_server_port + self.torcs_server_config.num_traffic_cars self.client.port = self.madras_agent_port # This is very bad code! But we wont need it any way in v2 logging.info("Num traffic cars in server {}".format( self.torcs_server_config.num_traffic_cars)) if self._config.traffic: self.traffic_manager.reset( self.torcs_server_config.num_traffic_cars) self._config.track_len = self.torcs_server_config.track_length if self.initial_reset: self.wait_for_observation() self.initial_reset = False else: while True: try: self.ob, self.client = TorcsEnv.reset(self, client=self.client, relaunch=True) except Exception: self.wait_for_observation() if not np.any(np.asarray(self.ob.track) < 0): break else: logging.info( "Reset: Reset failed as agent started off track. Retrying..." ) self.distance_traversed = 0 s_t = self.observation_handler.get_obs(self.ob, self._config) if self._config.pid_assist: self.PID_controller.reset() self.reward_handler.reset() self.done_handler.reset() logging.info("Reset: Starting new episode") if np.any(np.asarray(self.ob.track) < 0): logging.info("Reset produced bad track values.") return s_t def wait_for_observation(self): """Refresh client and wait for a valid observation to come in.""" self.ob = None while self.ob is None: logging.debug("{} Still waiting for observation".format(self.name)) try: self.client = snakeoil3.Client( p=self.madras_agent_port, vision=self._config.vision, visualise=self._config.visualise) # Open new UDP in vtorcs self.client.MAX_STEPS = self._config.client_max_steps self.client.get_servers_input(0) # Get the initial input from torcs raw_ob = self.client.S.d # Get the current full-observation from torcs self.ob = self.make_observation(raw_ob) except: pass def step_vanilla(self, action): """Execute single step with steer, acceleration, brake controls.""" if self._config.normalize_actions: action[1] = (action[1] + 1) / 2.0 # acceleration back to [0, 1] action[2] = (action[2] + 1) / 2.0 # brake back to [0, 1] r = 0.0 try: self.ob, r, done, info = TorcsEnv.step(self, 0, self.client, action, self._config.early_stop) except Exception as e: logging.debug("Exception {} caught at port {}".format( str(e), self.torcs_server_port)) self.wait_for_observation() game_state = { "torcs_reward": r, "torcs_done": done, "distance_traversed": self.client.S.d['distRaced'], "angle": self.client.S.d["angle"], "damage": self.client.S.d["damage"], "trackPos": self.client.S.d["trackPos"], "racePos": self.client.S.d["racePos"], "track": self.client.S.d["track"] } reward = self.reward_handler.get_reward(self._config, game_state) done = self.done_handler.get_done_signal(self._config, game_state) next_obs = self.observation_handler.get_obs(self.ob, self._config) if done: if self._config.traffic: self.traffic_manager.kill_all_traffic_agents() self.client.R.d["meta"] = True # Terminate the episode logging.info('Terminating PID {}'.format(self.client.serverPID)) info["distRaced"] = self.client.S.d["distRaced"] info["racePos"] = self.client.S.d["racePos"] return next_obs, reward, done, info def step_pid(self, desire): """Execute single step with lane_pos, velocity controls.""" lane_pos_scale = self._config.track_limits[ 'high'] - self._config.track_limits['low'] desire[ 0] = self._config.track_limits['low'] + lane_pos_scale * desire[0] if self._config.normalize_actions: # [-1, 1] should correspond to [-self._config.target_speed, # self._config.target_speed] speed_scale = self._config.target_speed desire[1] *= speed_scale # Now in m/s # convert to km/hr desire[1] *= 3600 / 1000 # Now in km/hr # Normalize to gym_torcs specs desire[1] /= self.default_speed reward = 0.0 for PID_step in range(self._config.pid_settings['pid_latency']): a_t = self.PID_controller.get_action(desire) try: self.ob, r, done, info = TorcsEnv.step(self, PID_step, self.client, a_t, self._config.early_stop) except Exception as e: logging.debug("Exception {} caught at port {}".format( str(e), self.torcs_server_port)) self.wait_for_observation() game_state = { "torcs_reward": r, "torcs_done": done, "distance_traversed": self.client.S.d["distRaced"], "angle": self.client.S.d["angle"], "damage": self.client.S.d["damage"], "trackPos": self.client.S.d["trackPos"], "racePos": self.client.S.d["racePos"], "track": self.client.S.d["track"] } reward += self.reward_handler.get_reward(self._config, game_state) if self._config.pid_assist: self.PID_controller.update(self.ob) done = self.done_handler.get_done_signal(self._config, game_state) if done: self.client.R.d["meta"] = True # Terminate the episode logging.info('Terminating PID {}'.format( self.client.serverPID)) break next_obs = self.observation_handler.get_obs(self.ob, self._config) info["distRaced"] = self.client.S.d["distRaced"] info["racePos"] = self.client.S.d["racePos"] return next_obs, reward, done, info def step(self, action): self.step_num += 1 if self._config.add_noise_to_actions: noise = np.random.normal(scale=self._config.action_noise_std, size=self.action_dim) action += noise if self._config.pid_assist: return self.step_pid(deepcopy(action)) else: return self.step_vanilla(deepcopy(action))
class MadrasTrafficAgent(object): def __init__(self, port, cfg, name='MadrasTraffic'): self.cfg = cfg self.name = name self.steer = 0.0 self.accel = 0.0 self.brake = 0.0 self.env = TorcsEnv( vision=(cfg["vision"] if "vision" in cfg else False), throttle=(cfg["throttle"] if "throttle" in cfg else True), gear_change=(cfg["gear_change"] if "gear_change" in cfg else False), visualise=(self.cfg["visualise"] if "visualise" in self.cfg else False), name=self.name) self.PID_controller = PIDController(cfg["pid_settings"]) self.port = port self.min_safe_dist = 0.005 * ( cfg["min_safe_dist"] if "min_safe_dist" in cfg else 1) # 1 meter def wait_for_observation(self): """Refresh client and wait for a valid observation to come in.""" self.ob = None while self.ob is None: logging.debug("{} Still waiting for observation at {}".format( self.name, self.port)) try: self.client = snakeoil3.Client( p=self.port, vision=(self.cfg["vision"] if "vision" in self.cfg else False), visualise=(self.cfg["visualise"] if "visualise" in self.cfg else False), name=self.name) # Open new UDP in vtorcs self.client.MAX_STEPS = self.cfg[ "client_max_steps"] if "client_max_steps" in self.cfg else np.inf self.client.get_servers_input(0) # Get the initial input from torcs raw_ob = self.client.S.d # Get the current full-observation from torcs self.ob = self.env.make_observation(raw_ob) except: pass def get_action(self): raise NotImplementedError( "Successor classes must implement this method") def flag_off(self, random_seed=0): del random_seed self.wait_for_observation() logging.debug("[{}]: My server is PID: {}".format( self.name, self.client.serverPID)) self.is_alive = True while True: if self.is_alive: action = self.get_action() try: self.ob, _, done, info = self.env.step( 0, self.client, action) except Exception as e: logging.debug( "Exception {} caught by {} traffic agent at port {}". format(str(e), self.name, self.port)) self.wait_for_observation() self.detect_and_prevent_imminent_crash_out_of_track() self.PID_controller.update(self.ob) if done: self.is_alive = False logging.debug("{} died.".format(self.name)) def get_front_opponents(self): return np.array([ self.ob.opponents[16], self.ob.opponents[17], self.ob.opponents[18], ]) def detect_and_prevent_imminent_crash_out_of_track(self): while True: min_dist_from_track = np.min(self.ob.track) if min_dist_from_track <= self.min_safe_dist: closest_dist_sensor_id = np.argmin(self.ob.track) if closest_dist_sensor_id < 9: action = [1, 0, 1] elif closest_dist_sensor_id > 9: action = [-1, 0, 1] else: action = [0, 0, 1] self.ob, _, _, _ = self.env.step(0, self.client, action) else: break def get_collision_cone_radius(self): speed = self.ob.speedX * self.env.default_speed * (1000.0 / 3600.0 ) # speed in m/sec collision_time_window = self.cfg[ "collision_time_window"] if "collision_time_window" in self.cfg else 1 collision_cone_radius = collision_time_window * speed return collision_cone_radius / 200.0 # Normalizing def avoid_impending_collision(self): # If the opponent in front is too close, brake opponents_in_front = self.get_front_opponents() closest_front = np.min(opponents_in_front) frontal_distance_threshold = self.get_collision_cone_radius() if closest_front < frontal_distance_threshold: self.brake = 1 else: self.brake = 0