예제 #1
0
    def reset(self, prev_step_info=None):
        """Reset Method. To be called at the end of each episode"""
        if self.initial_reset:
            while self.ob is None:
                try:
                    self.client = snakeoil3.Client(p=self.port,
                                                   vision=self.vision)
                    # Open new UDP in vtorcs
                    self.client.MAX_STEPS = self.CLIENT_MAX_STEPS
                    self.client.get_servers_input(step=0)
                    # Get the initial input from torcs
                    raw_ob = self.client.S.d
                    # Get the current full-observation
                    self.ob = self.make_observation(raw_ob)
                except:
                    pass
            self.initial_reset = False

        else:
            try:
                if 'termination_cause' in list(prev_step_info.keys()) and\
                        prev_step_info['termination_cause'] == 'hardReset':
                    self.ob, self.client =\
                        TorcsEnv.reset(self, client=self.client, relaunch=True)
                else:
                    self.ob, self.client =\
                        TorcsEnv.reset(self, client=self.client, relaunch=True)

            except Exception as e:
                self.ob = None
                while self.ob is None:
                    try:
                        self.client = snakeoil3.Client(p=self.port,
                                                       vision=self.vision)
                        # Open new UDP in vtorcs
                        self.client.MAX_STEPS = self.CLIENT_MAX_STEPS
                        self.client.get_servers_input(step=0)
                        # Get the initial input from torcs
                        raw_ob = self.client.S.d
                        # Get the current full-observation from torcs
                        self.ob = self.make_observation(raw_ob)
                    except:
                        pass

        self.distance_traversed = 0
        s_t = np.hstack((self.ob.angle, self.ob.track, self.ob.trackPos,
                        self.ob.speedX, self.ob.speedY, self.ob.speedZ,
                        self.ob.wheelSpinVel / 100.0, self.ob.rpm))

        return s_t
예제 #2
0
    def step(self, desire):
        """Step method to be called at each time step."""
        r_t = 0

        for PID_step in range(self.PID_latency):
                # Implement the desired trackpos and velocity using PID
            if self.pid_assist:
                self.accel_PID.update_error((desire[1] - self.prev_vel))
                self.steer_PID.update_error((-(self.prev_lane - desire[0]) / 10 +
                                            self.prev_angle))
                if self.accel_PID.output() < 0.0:
                    brake = 1
                else:
                    brake = 0
                a_t = np.asarray([self.steer_PID.output(),
                                 self.accel_PID.output(), brake])
            else:
                a_t = desire
            try:
                self.ob, r, done, info = TorcsEnv.step(self, PID_step,
                                                       self.client, a_t,
                                                       self.early_stop)
            except Exception as e:
                print(("Exception caught at port " + str(e)))
                self.ob = None
                while self.ob is None:
                    try:
                        self.client = snakeoil3.Client(p=self.port,
                                                       vision=self.vision)
                        # Open new UDP in vtorcs
                        self.client.MAX_STEPS = self.CLIENT_MAX_STEPS
                        self.client.get_servers_input(0)
                        # Get the initial input from torcs
                        raw_ob = self.client.S.d
                        # Get the current full-observation from torcs
                        self.ob = self.make_observation(raw_ob)
                    except:
                        pass
                    continue
            self.prev_vel = self.ob.speedX
            self.prev_angle = self.ob.angle
            self.prev_lane = self.ob.trackPos
            if (math.isnan(r)):
                r = 0.0
            r_t += r  # accumulate rewards over all the time steps

            self.distance_traversed = self.client.S.d['distRaced']
            r_t += (self.distance_traversed - self.prev_dist) /\
                self.track_len
            self.prev_dist = deepcopy(self.distance_traversed)
            if self.distance_traversed >= self.track_len:
                done = True
            if done:
                break

        s_t1 = np.hstack((self.ob.angle, self.ob.track, self.ob.trackPos,
                          self.ob.speedX, self.ob.speedY, self.ob.speedZ,
                          self.ob.wheelSpinVel / 100.0, self.ob.rpm))

        return s_t1, r_t, done, info
예제 #3
0
    def reset(self, client, relaunch=False):

        port = client.port
        self.time_step = 0

        if self.initial_reset is not True:
            client.R.d['meta'] = True
            client.respond_to_server()

            ## TENTATIVE. Restarting TORCS every episode suffers the memory leak bug!
            if relaunch is True:
                self.reset_torcs()
                print("### TORCS is RELAUNCHED ###")

        # Modify here if you use multiple tracks in the environment
        client = snakeoil3.Client(p=port, vision=self.vision)  # Open new UDP in vtorcs
        client.MAX_STEPS = np.inf

        # client = self.client
        client.get_servers_input(-1)  # Get the initial input from torcs

        obs = client.S.d  # Get the current full-observation from torcs
        self.observation = self.make_observation(obs)
        self.currState = np.hstack((self.observation.angle, self.observation.track, self.observation.trackPos, 
                                    self.observation.speedX, self.observation.speedY,  self.observation.speedZ, 
                                    self.observation.wheelSpinVel/100.0, self.observation.rpm))

        self.last_u = None

        self.initial_reset = False
        return self.get_obs(), client
예제 #4
0
def playTraffic(port=3101, target_vel=50.0, angle=0.0, sleep=0):
    """Traffic Play function."""
    env = TorcsEnv(vision=False, throttle=True, gear_change=False)
    ob = None
    while ob is None:
        try:
            client = snakeoil3.Client(p=port, vision=False)
            client.MAX_STEPS = np.inf
            client.get_servers_input(step=0)
            obs = client.S.d
            ob = env.make_observation(obs)
        except:
            pass
    episode_count = cfg['traffic']['max_eps']
    max_steps = cfg['traffic']['max_steps_eps']
    early_stop = 0
    velocity = target_vel / 300.0
    accel_pid = PID(np.array([10.5, 0.05, 2.8]))
    steer_pid = PID(np.array([5.1, 0.001, 0.000001]))
    steer = 0.0
    accel = 0.0
    brake = 0
    print(velocity)
    for i in range(episode_count):
        info = {'termination_cause': 0}
        steer = 0.0
        accel = 0.0
        brake = 0

        for step in range(max_steps):
            a_t = np.asarray([steer, accel, brake])  # [steer, accel, brake]
            try:
                ob, r_t, done, info = env.step(step, client, a_t, early_stop)
                if done:
                    break
            except Exception as e:
                print("Exception caught at port " + str(i) + str(e))
                ob = None
                while ob is None:
                    try:
                        client = snakeoil3.Client(p=port, vision=False)
                        client.MAX_STEPS = np.inf
                        client.get_servers_input(step=0)
                        obs = client.S.d
                        ob = env.make_observation(obs)
                    except:
                        pass
                    continue
            if (step <= sleep):
                print("WAIT" + str(port))
                continue
            opp = ob.opponents
            front = np.array([opp[15], opp[16], opp[17], opp[18], opp[19]])
            closest_front = np.min(front)
            print(ob.speedX * 300)
            vel_error = velocity - ob.speedX
            angle_error = -(ob.trackPos + angle
                            ) / 10 + ob.angle + random.choice([1, -1]) * 0.05
            steer_pid.update_error(angle_error)
            accel_pid.update_error(vel_error)
            accel = accel_pid.output()
            steer = steer_pid.output()
            if accel < 0:
                brake = 1
            else:
                brake = 0
            if closest_front < (
                (madras.floatX(0.5 * ob.speedX * 100) + 10.0) / 200.0):
                brake = 1
            else:
                brake = 0

        try:
            if 'termination_cause' in info.keys(
            ) and info['termination_cause'] == 'hardReset':
                print('Hard reset by some agent')
                ob, client = env.reset(client=client, relaunch=True)

        except Exception as e:
            print("Exception caught at point B at port " + str(i) + str(e))
            ob = None
            while ob is None:
                try:
                    client = snakeoil3.Client(
                        p=port, vision=False)  # Open new UDP in vtorcs
                    client.MAX_STEPS = np.inf
                    client.get_servers_input(
                        0)  # Get the initial input from torcs
                    obs = client.S.d  # Get the current full-observation from torcs
                    ob = env.make_observation(obs)
                except:
                    print("Exception caught at at point C at port " + str(i) +
                          str(e))
예제 #5
0
def playGame(f_diagnostics,
             train_indicator,
             port=3101,
             config_dict=None):  # 1 means Train, 0 means simply Run

    if config_dict != None:
        visualize_after = config_dict['visualize_after']
        is_training = config_dict['is_training']
        total_explore = config_dict['total_explore']
        max_eps = config_dict['max_eps']
        max_steps_eps = config_dict['max_steps_eps']
        wait_at_beginning = config_dict['wait_at_beginning']
        initial_wait_period = config_dict['initial_wait_period']
        epsilon_start = config_dict['epsilon_start']
        start_reward = config_dict['start_reward']

        save_location = config_dict['save_location']
        torcsPort = config_dict['torcsPort']
        configFile = config_dict['configFile']

        train_indicator = config_dict['train_indicator']

    action_dim = 3  #Steering/Acceleration/Brake
    state_dim = 65  #of sensors input

    env_name = 'Torcs_Env'
    weights_location = save_location + str(port) + "/"
    agent = DDPG(env_name, state_dim, action_dim, weights_location)

    # Generate a Torcs environment
    print("I have been asked to use port: ", port)
    env = TorcsEnv(vision=False, throttle=True, gear_change=False)
    ob = None
    while ob is None:
        try:
            client = snakeoil3.Client(p=port,
                                      vision=False)  # Open new UDP in vtorcs
            client.MAX_STEPS = np.inf

            client.get_servers_input(0)  # Get the initial input from torcs

            obs = client.S.d  # Get the current full-observation from torcs
            ob = env.make_observation(obs)

            s_t = np.hstack(
                (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,
                 ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm, ob.opponents))
        except:
            pass

    EXPLORE = total_explore
    episode_count = max_eps
    max_steps = max_steps_eps
    epsilon = epsilon_start
    done = False
    epsilon_steady_state = 0.01  # This is used for early stopping.

    totalSteps = 0
    best_reward = -100000
    running_avg_reward = 0.

    print("TORCS Experiment Start.")
    for i in range(episode_count):

        early_stop = 1
        total_reward = 0.
        info = {'termination_cause': 0}
        distance_traversed = 0.
        speed_array = []
        trackPos_array = []

        print('\n\nStarting new episode...\n')

        for step in range(max_steps):

            # Take noisy actions during training
            if (train_indicator):
                epsilon -= 1.0 / EXPLORE
                epsilon = max(epsilon, epsilon_steady_state)
                a_t = agent.noise_action(s_t, epsilon)
            else:
                a_t = agent.action(s_t)  # [steer, accel, brake]
            try:
                ob, r_t, done, info = env.step(step, client, a_t, early_stop)
                if done:
                    break
                analyse_info(info, printing=False)

                s_t1 = np.hstack(
                    (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,
                     ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm, ob.opponents))
                distance_traversed += ob.speedX * np.cos(
                    ob.angle)  #Assuming 1 step = 1 second
                speed_array.append(ob.speedX * np.cos(ob.angle))
                trackPos_array.append(ob.trackPos)

                #Checking for nan rewards:
                if (math.isnan(r_t)):
                    r_t = 0.0
                    for bad_r in range(50):
                        print('Bad Reward Found')
                    break

            # Add to replay buffer only if training
                if (train_indicator):
                    agent.perceive(s_t, a_t, r_t, s_t1,
                                   done)  # Add experience to replay buffer

            except Exception as e:
                print("Exception caught after training, at port " + str(i) +
                      str(e))
                ob = None
                while ob is None:
                    try:
                        client = snakeoil3.Client(
                            p=port, vision=False)  # Open new UDP in vtorcs
                        client.MAX_STEPS = np.inf
                        client.get_servers_input(
                            0)  # Get the initial input from torcs
                        obs = client.S.d  # Get the current full-observation from torcs
                        ob = env.make_observation(obs)
                    except:
                        pass
                    continue
            total_reward += r_t
            s_t = s_t1

            # Displaying progress every 15 steps.
            if ((np.mod(step, 15) == 0)):
                print("Episode", i, "Step", step, "Epsilon", epsilon, "Action",
                      a_t, "Reward", r_t)

            totalSteps += 1
            if done:
                break

        if ((train_indicator == 1)):
            if (i % 300 == 0):
                agent.saveNetwork(i)

        running_avg_reward = running_average(running_avg_reward, i + 1,
                                             total_reward)

        print("TOTAL REWARD @ " + str(i) + "-th Episode  : Num_Steps= " +
              str(step) + "; Max_steps= " + str(max_steps) + "; Reward= " +
              str(total_reward) + "; Running average reward= " +
              str(running_avg_reward))
        print("Total Step: " + str(totalSteps))
        print("")

        print(info)
        try:
            if 'termination_cause' in info.keys(
            ) and info['termination_cause'] == 'hardReset':
                print('Hard reset by some agent')
                ob, client = env.reset(client=client, relaunch=True)
            else:
                ob, client = env.reset(client=client, relaunch=True)
        except Exception as e:
            print("Exception caught after episode end, at port " + str(i) +
                  str(e))
            ob = None
            while ob is None:
                try:
                    client = snakeoil3.Client(
                        p=port, vision=False)  # Open new UDP in vtorcs
                    client.MAX_STEPS = np.inf
                    client.get_servers_input(
                        0)  # Get the initial input from torcs
                    obs = client.S.d  # Get the current full-observation from torcs
                    ob = env.make_observation(obs)
                except:
                    print(
                        "Another exception caught while handling exception, at port "
                        + str(i) + str(e))

        s_t = np.hstack(
            (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ,
             ob.wheelSpinVel / 100.0, ob.rpm, ob.opponents))

    env.end()  # This is for shutting down TORCS
    print("Finish.")
예제 #6
0
        if S['speedX'] > 50:
            R['gear'] = 2
        if S['speedX'] > 80:
            R['gear'] = 3
        if S['speedX'] > 110:
            R['gear'] = 4
        if S['speedX'] > 140:
            R['gear'] = 5
        if S['speedX'] > 170:
            R['gear'] = 6
        # Throttle Control
        if S['speedX'] < target_speed - (R['steer'] * 50):
            R['accel'] += .01
        else:
            R['accel'] -= .01
        if S['speedX'] < 10:
            R['accel'] += 1 / (S['speedX'] + .1)

    print(S['speedX'], R['gear'])
    return


if __name__ == "__main__":
    C = snakeoil3.Client(p=PORT, e=maxSteps, vision=False)
    print(sys.argv[1])
    for step in range(C.maxSteps, 0, -1):
        C.get_servers_input(0)
        drive_traffic(C, sys.argv[1])
        C.respond_to_server()
    C.shutdown()