def reset(self, prev_step_info=None): """Reset Method. To be called at the end of each episode""" if self.initial_reset: while self.ob is None: try: self.client = snakeoil3.Client(p=self.port, vision=self.vision) # Open new UDP in vtorcs self.client.MAX_STEPS = self.CLIENT_MAX_STEPS self.client.get_servers_input(step=0) # Get the initial input from torcs raw_ob = self.client.S.d # Get the current full-observation self.ob = self.make_observation(raw_ob) except: pass self.initial_reset = False else: try: if 'termination_cause' in list(prev_step_info.keys()) and\ prev_step_info['termination_cause'] == 'hardReset': self.ob, self.client =\ TorcsEnv.reset(self, client=self.client, relaunch=True) else: self.ob, self.client =\ TorcsEnv.reset(self, client=self.client, relaunch=True) except Exception as e: self.ob = None while self.ob is None: try: self.client = snakeoil3.Client(p=self.port, vision=self.vision) # Open new UDP in vtorcs self.client.MAX_STEPS = self.CLIENT_MAX_STEPS self.client.get_servers_input(step=0) # Get the initial input from torcs raw_ob = self.client.S.d # Get the current full-observation from torcs self.ob = self.make_observation(raw_ob) except: pass self.distance_traversed = 0 s_t = np.hstack((self.ob.angle, self.ob.track, self.ob.trackPos, self.ob.speedX, self.ob.speedY, self.ob.speedZ, self.ob.wheelSpinVel / 100.0, self.ob.rpm)) return s_t
def step(self, desire): """Step method to be called at each time step.""" r_t = 0 for PID_step in range(self.PID_latency): # Implement the desired trackpos and velocity using PID if self.pid_assist: self.accel_PID.update_error((desire[1] - self.prev_vel)) self.steer_PID.update_error((-(self.prev_lane - desire[0]) / 10 + self.prev_angle)) if self.accel_PID.output() < 0.0: brake = 1 else: brake = 0 a_t = np.asarray([self.steer_PID.output(), self.accel_PID.output(), brake]) else: a_t = desire try: self.ob, r, done, info = TorcsEnv.step(self, PID_step, self.client, a_t, self.early_stop) except Exception as e: print(("Exception caught at port " + str(e))) self.ob = None while self.ob is None: try: self.client = snakeoil3.Client(p=self.port, vision=self.vision) # Open new UDP in vtorcs self.client.MAX_STEPS = self.CLIENT_MAX_STEPS self.client.get_servers_input(0) # Get the initial input from torcs raw_ob = self.client.S.d # Get the current full-observation from torcs self.ob = self.make_observation(raw_ob) except: pass continue self.prev_vel = self.ob.speedX self.prev_angle = self.ob.angle self.prev_lane = self.ob.trackPos if (math.isnan(r)): r = 0.0 r_t += r # accumulate rewards over all the time steps self.distance_traversed = self.client.S.d['distRaced'] r_t += (self.distance_traversed - self.prev_dist) /\ self.track_len self.prev_dist = deepcopy(self.distance_traversed) if self.distance_traversed >= self.track_len: done = True if done: break s_t1 = np.hstack((self.ob.angle, self.ob.track, self.ob.trackPos, self.ob.speedX, self.ob.speedY, self.ob.speedZ, self.ob.wheelSpinVel / 100.0, self.ob.rpm)) return s_t1, r_t, done, info
def reset(self, client, relaunch=False): port = client.port self.time_step = 0 if self.initial_reset is not True: client.R.d['meta'] = True client.respond_to_server() ## TENTATIVE. Restarting TORCS every episode suffers the memory leak bug! if relaunch is True: self.reset_torcs() print("### TORCS is RELAUNCHED ###") # Modify here if you use multiple tracks in the environment client = snakeoil3.Client(p=port, vision=self.vision) # Open new UDP in vtorcs client.MAX_STEPS = np.inf # client = self.client client.get_servers_input(-1) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs self.observation = self.make_observation(obs) self.currState = np.hstack((self.observation.angle, self.observation.track, self.observation.trackPos, self.observation.speedX, self.observation.speedY, self.observation.speedZ, self.observation.wheelSpinVel/100.0, self.observation.rpm)) self.last_u = None self.initial_reset = False return self.get_obs(), client
def playTraffic(port=3101, target_vel=50.0, angle=0.0, sleep=0): """Traffic Play function.""" env = TorcsEnv(vision=False, throttle=True, gear_change=False) ob = None while ob is None: try: client = snakeoil3.Client(p=port, vision=False) client.MAX_STEPS = np.inf client.get_servers_input(step=0) obs = client.S.d ob = env.make_observation(obs) except: pass episode_count = cfg['traffic']['max_eps'] max_steps = cfg['traffic']['max_steps_eps'] early_stop = 0 velocity = target_vel / 300.0 accel_pid = PID(np.array([10.5, 0.05, 2.8])) steer_pid = PID(np.array([5.1, 0.001, 0.000001])) steer = 0.0 accel = 0.0 brake = 0 print(velocity) for i in range(episode_count): info = {'termination_cause': 0} steer = 0.0 accel = 0.0 brake = 0 for step in range(max_steps): a_t = np.asarray([steer, accel, brake]) # [steer, accel, brake] try: ob, r_t, done, info = env.step(step, client, a_t, early_stop) if done: break except Exception as e: print("Exception caught at port " + str(i) + str(e)) ob = None while ob is None: try: client = snakeoil3.Client(p=port, vision=False) client.MAX_STEPS = np.inf client.get_servers_input(step=0) obs = client.S.d ob = env.make_observation(obs) except: pass continue if (step <= sleep): print("WAIT" + str(port)) continue opp = ob.opponents front = np.array([opp[15], opp[16], opp[17], opp[18], opp[19]]) closest_front = np.min(front) print(ob.speedX * 300) vel_error = velocity - ob.speedX angle_error = -(ob.trackPos + angle ) / 10 + ob.angle + random.choice([1, -1]) * 0.05 steer_pid.update_error(angle_error) accel_pid.update_error(vel_error) accel = accel_pid.output() steer = steer_pid.output() if accel < 0: brake = 1 else: brake = 0 if closest_front < ( (madras.floatX(0.5 * ob.speedX * 100) + 10.0) / 200.0): brake = 1 else: brake = 0 try: if 'termination_cause' in info.keys( ) and info['termination_cause'] == 'hardReset': print('Hard reset by some agent') ob, client = env.reset(client=client, relaunch=True) except Exception as e: print("Exception caught at point B at port " + str(i) + str(e)) ob = None while ob is None: try: client = snakeoil3.Client( p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input( 0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) except: print("Exception caught at at point C at port " + str(i) + str(e))
def playGame(f_diagnostics, train_indicator, port=3101, config_dict=None): # 1 means Train, 0 means simply Run if config_dict != None: visualize_after = config_dict['visualize_after'] is_training = config_dict['is_training'] total_explore = config_dict['total_explore'] max_eps = config_dict['max_eps'] max_steps_eps = config_dict['max_steps_eps'] wait_at_beginning = config_dict['wait_at_beginning'] initial_wait_period = config_dict['initial_wait_period'] epsilon_start = config_dict['epsilon_start'] start_reward = config_dict['start_reward'] save_location = config_dict['save_location'] torcsPort = config_dict['torcsPort'] configFile = config_dict['configFile'] train_indicator = config_dict['train_indicator'] action_dim = 3 #Steering/Acceleration/Brake state_dim = 65 #of sensors input env_name = 'Torcs_Env' weights_location = save_location + str(port) + "/" agent = DDPG(env_name, state_dim, action_dim, weights_location) # Generate a Torcs environment print("I have been asked to use port: ", port) env = TorcsEnv(vision=False, throttle=True, gear_change=False) ob = None while ob is None: try: client = snakeoil3.Client(p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input(0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) s_t = np.hstack( (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm, ob.opponents)) except: pass EXPLORE = total_explore episode_count = max_eps max_steps = max_steps_eps epsilon = epsilon_start done = False epsilon_steady_state = 0.01 # This is used for early stopping. totalSteps = 0 best_reward = -100000 running_avg_reward = 0. print("TORCS Experiment Start.") for i in range(episode_count): early_stop = 1 total_reward = 0. info = {'termination_cause': 0} distance_traversed = 0. speed_array = [] trackPos_array = [] print('\n\nStarting new episode...\n') for step in range(max_steps): # Take noisy actions during training if (train_indicator): epsilon -= 1.0 / EXPLORE epsilon = max(epsilon, epsilon_steady_state) a_t = agent.noise_action(s_t, epsilon) else: a_t = agent.action(s_t) # [steer, accel, brake] try: ob, r_t, done, info = env.step(step, client, a_t, early_stop) if done: break analyse_info(info, printing=False) s_t1 = np.hstack( (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm, ob.opponents)) distance_traversed += ob.speedX * np.cos( ob.angle) #Assuming 1 step = 1 second speed_array.append(ob.speedX * np.cos(ob.angle)) trackPos_array.append(ob.trackPos) #Checking for nan rewards: if (math.isnan(r_t)): r_t = 0.0 for bad_r in range(50): print('Bad Reward Found') break # Add to replay buffer only if training if (train_indicator): agent.perceive(s_t, a_t, r_t, s_t1, done) # Add experience to replay buffer except Exception as e: print("Exception caught after training, at port " + str(i) + str(e)) ob = None while ob is None: try: client = snakeoil3.Client( p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input( 0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) except: pass continue total_reward += r_t s_t = s_t1 # Displaying progress every 15 steps. if ((np.mod(step, 15) == 0)): print("Episode", i, "Step", step, "Epsilon", epsilon, "Action", a_t, "Reward", r_t) totalSteps += 1 if done: break if ((train_indicator == 1)): if (i % 300 == 0): agent.saveNetwork(i) running_avg_reward = running_average(running_avg_reward, i + 1, total_reward) print("TOTAL REWARD @ " + str(i) + "-th Episode : Num_Steps= " + str(step) + "; Max_steps= " + str(max_steps) + "; Reward= " + str(total_reward) + "; Running average reward= " + str(running_avg_reward)) print("Total Step: " + str(totalSteps)) print("") print(info) try: if 'termination_cause' in info.keys( ) and info['termination_cause'] == 'hardReset': print('Hard reset by some agent') ob, client = env.reset(client=client, relaunch=True) else: ob, client = env.reset(client=client, relaunch=True) except Exception as e: print("Exception caught after episode end, at port " + str(i) + str(e)) ob = None while ob is None: try: client = snakeoil3.Client( p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input( 0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) except: print( "Another exception caught while handling exception, at port " + str(i) + str(e)) s_t = np.hstack( (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm, ob.opponents)) env.end() # This is for shutting down TORCS print("Finish.")
if S['speedX'] > 50: R['gear'] = 2 if S['speedX'] > 80: R['gear'] = 3 if S['speedX'] > 110: R['gear'] = 4 if S['speedX'] > 140: R['gear'] = 5 if S['speedX'] > 170: R['gear'] = 6 # Throttle Control if S['speedX'] < target_speed - (R['steer'] * 50): R['accel'] += .01 else: R['accel'] -= .01 if S['speedX'] < 10: R['accel'] += 1 / (S['speedX'] + .1) print(S['speedX'], R['gear']) return if __name__ == "__main__": C = snakeoil3.Client(p=PORT, e=maxSteps, vision=False) print(sys.argv[1]) for step in range(C.maxSteps, 0, -1): C.get_servers_input(0) drive_traffic(C, sys.argv[1]) C.respond_to_server() C.shutdown()