Exemplo n.º 1
0
def test_sac(sess, args, sac, saver):

    saver.restore(sess, 'ckpt/model')

    env = TorcsEnv(vision=False, throttle=True, gear_change=False)

    ob = env.reset(
        relaunch=True
    )  #relaunch TORCS every N episode because of the memory leak error

    s = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,
                   ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm))

    done = False
    ep_rew = 0.0
    ep_len = 0

    while (not done):

        # deterministic actions at test time
        a = sac.get_action(s, True)

        ob, r, done, _ = env.step(a)
        s = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,
                       ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm))

        ep_rew += r
        ep_len += 1

        if (ep_len >= args.max_ep_len):
            done = True

    print('test time performance: | rewards: ', ep_rew, ' | length: ', ep_len)
Exemplo n.º 2
0
def train_sac(sess, args, sac, saver):

    env = TorcsEnv(vision=False, throttle=True, gear_change=False)

    replay_buffer = ReplayBuffer(args.s_dim, args.a_dim, args.buff_size)

    for ep in range(args.total_ep):

        if np.mod(ep, 100) == 0:
            ob = env.reset(
                relaunch=True
            )  #relaunch TORCS every N episode because of the memory leak error
        else:
            ob = env.reset()

        s = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,
                       ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm))

        done = False
        ep_rew = 0.0
        ep_len = 0

        while (not done):

            # first 10 episodes, just step on gas, drive straight
            if (ep > 10):
                a = sac.get_action(s)
            else:
                a = np.array([0.0, 1.0, 0.0])

            ob, r, done, _ = env.step(a)
            s2 = np.hstack(
                (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,
                 ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm))

            ep_rew += r
            ep_len += 1

            if (ep_len >= args.max_ep_len):
                done = True

            replay_buffer.store(s, a, r, s2, float(done))

            s = s2

            batch = replay_buffer.sample_batch(args.batch_size)
            outs = sac.train(batch)

        print('episode: ', ep, ' | episode rewards: ', round(ep_rew, 4),
              ' | episode length: ', ep_len, ' | alpha/temperature: ', outs[9])
        with open("performance.txt", "a") as myfile:
            myfile.write(
                str(ep) + " " + str(ep_len) + " " + str(round(ep_rew, 4)) +
                " " + str(round(outs[9], 4)) + "\n")

        if (ep % 10 == 0):
            # save model
            saver.save(sess, 'ckpt/model')
Exemplo n.º 3
0
 def __init__(self, port=3101):
     self.loss = None
     self.name = 'car'
     self.env = TorcsEnv(vision=False, throttle=False, port=port)
     ob = self.env.reset(relaunch=False)
     obs_shape = self.process_obs(ob)
     self.max_timesteps = 1000
     self.demos_loaded = False
     super(ImitationCar, self).__init__(self.env, dim_action=1, dim_obs=2)
Exemplo n.º 4
0
    def __init__(self, load_from=None, will_train=True):
        self.env = TorcsEnv(
            path='/usr/local/share/games/torcs/config/raceman/quickrace.xml')
        self.args = SAC_args()
        self.buffer = ReplayBuffer(self.args.buffer_size)

        action_dim = self.env.action_space.shape[0]
        state_dim = self.env.observation_space.shape[0]
        hidden_dim = 256

        self.action_size = action_dim
        self.state_size = state_dim

        self.value_net = ValueNetwork(state_dim,
                                      hidden_dim).to(self.args.device)
        self.target_value_net = ValueNetwork(state_dim,
                                             hidden_dim).to(self.args.device)

        self.soft_q_net1 = SoftQNetwork(state_dim, action_dim,
                                        hidden_dim).to(self.args.device)
        self.soft_q_net2 = SoftQNetwork(state_dim, action_dim,
                                        hidden_dim).to(self.args.device)

        self.policy_net = PolicyNetwork(state_dim, action_dim,
                                        hidden_dim).to(self.args.device)

        self.target_value_net.load_state_dict(self.value_net.state_dict())

        self.value_criterion = nn.MSELoss()
        self.soft_q_loss1 = nn.MSELoss()
        self.soft_q_loss2 = nn.MSELoss()

        self.value_opt = optim.Adam(self.value_net.parameters(),
                                    lr=self.args.lr)
        self.soft_q_opt1 = optim.Adam(self.soft_q_net1.parameters(),
                                      lr=self.args.lr)
        self.soft_q_opt2 = optim.Adam(self.soft_q_net2.parameters(),
                                      lr=self.args.lr)
        self.policy_opt = optim.Adam(self.policy_net.parameters(),
                                     lr=self.args.lr)

        if will_train:
            current_time = time.strftime('%d-%b-%y-%H.%M.%S', time.localtime())
            self.plot_folder = f'plots/{current_time}'
            self.model_save_folder = f'model/{current_time}'
            make_sure_dir_exists(self.plot_folder)
            make_sure_dir_exists(self.model_save_folder)
            self.cp = Checkpoint(self.model_save_folder)

        if load_from is not None:
            try:
                self.load_checkpoint(load_from)
            except FileNotFoundError:
                print(f'{load_from} not found. Running default.')
        else:
            print('Starting from scratch.')
Exemplo n.º 5
0
 def __init__(self):
     self.n = 4
     #self.n2 = 0
     self.env = TorcsEnv(vision=False, throttle=True, gear_change=False)
     self.observation_space = self.env.observation_space  # basically this is one agents' action space
     self.action_space = self.env.action_space
     self.step_count = 0
     self.agent_list = []
     #self.agent2_list = []
     self.initialize_agents()
Exemplo n.º 6
0
 def __init__(self, rank, global_net, counter, lock, args):
     super(A3C_Agent, self).__init__()
     self.network = A3C_Network(29, 3, lock)
     self.global_net = global_net
     self.counter = counter
     self.lock = lock
     self.args = args
     self.done = True
     self.name = ''
     self.env = TorcsEnv(
         port=3101 + rank,
         path='/usr/local/share/games/torcs/config/raceman/quickrace.xml')
     self.reset()
Exemplo n.º 7
0
def main():
    count = 0

    env = TorcsEnv(vision=False, throttle=True, gear_change=False, text=False)
    # action_fd = open('./trajectory/action15.csv', 'w', newline='')
    # observation_fd = open('./trajectory/observation15.csv', 'w', newline='')

    # action_writer = csv.writer(action_fd, delimiter=',')
    # observation_writer = csv.writer(observation_fd, delimiter=',')

    for ep in range(1, 5):
        done = False
        step, score = 0, 0
        if np.mod(ep, 5) == 0:
            ob = env.reset(relaunch=True)
            state = np.hstack(
                (ob.angle, ob.track, ob.trackPos, ob.speedY, ob.speedX,
                 ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm))
            #observation_writer.writerow(state.tolist())

        else:
            ob = env.reset()
            state = np.hstack(
                (ob.angle, ob.track, ob.trackPos, ob.speedY, ob.speedX,
                 ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm))
            #observation_writer.writerow(state.tolist())

        while not done:
            keyboard.hook(get_action)
            time.sleep(0.05)
            # write action value in action.csv

            # observation_writer.writerow(state.tolist())
            # action_writer.writerow([action[0], action[1], action[2]])

            next_ob, reward, done, info = env.step(action)

            next_state = np.hstack(
                (next_ob.angle, next_ob.track, next_ob.trackPos,
                 next_ob.speedY, next_ob.speedX, next_ob.speedZ,
                 next_ob.wheelSpinVel / 100.0, next_ob.rpm))
            #observation_writer.writerow(state.tolist())
            state = next_state
            # print(step, score)
            print(next_ob.lastLapTime)
            score += reward
            step += 1

    action_fd.close()
    observation_fd.close()
    sys.exit()
Exemplo n.º 8
0
    def __init__(self, eval_inst, seed = 10, train_steps=10):
        super().__init__(self, seed=seed)

        self.env = TorcsEnv()

        self.eval_inst = eval_inst

        self.train_steps = train_steps

        self.state_size = (128, 128, 1)
        print('state size', self.state_size)

        self.action_size = self.env.action_space.n
        print('action space', self.env.action_space)
        print('action size', self.env.action_space.n)
Exemplo n.º 9
0
def env_setup(joystick_id):
    #### Generate a Torcs environment
    # enable vision input, the action is steering only (1 dim continuous action)
    env = TorcsEnv(vision=True, throttle=False)

    # without vision input, the action is steering and throttle (2 dim continuous action)
    # env = TorcsEnv(vision=False, throttle=True)

    ob = env.reset(
        relaunch=True)  # with torcs relaunch (avoid memory leak bug in torcs)
    # ob = env.reset()  # without torcs relaunch

    #### Initialize a joystick controller
    joystick_id = 0
    joypad = Controller(joystick_id)
Exemplo n.º 10
0
    def _thunk():
        # env = gym.make(ENV_ID)
        # env = Pend2.PendulumEnv()
        #        env = AliengoGym.AlienGoEnv(render = False)
        env = TorcsEnv(vision=True, throttle=True, gear_change=False)

        return env
Exemplo n.º 11
0
def test_policy(track_name, seed):

    vision = False

    env = TorcsEnv(vision=vision,
                   throttle=True,
                   gear_change=False,
                   track_name=track_name)
    nn_agent = NeuralAgent(track_name=track_name)
    #Now load the weight
    logging.info("Now we load the weight")
    try:
        nn_agent.actor.model.load_weights("./model_1343/actormodel_" +
                                          str(seed) + '_' + str(900) + ".h5")
        nn_agent.critic.model.load_weights("./model_1343/criticmodel_" +
                                           str(seed) + '_' + str(900) + ".h5")
        nn_agent.actor.target_model.load_weights("./model_1343/actormodel_" +
                                                 str(seed) + '_' + str(900) +
                                                 ".h5")
        nn_agent.critic.target_model.load_weights("./model_1343/criticmodel_" +
                                                  str(seed) + '_' + str(900) +
                                                  ".h5")
        logging.info("Weight load successfully")
    except:
        logging.info("Cannot find the weight")
    nn_agent.rollout(env)
    return None
Exemplo n.º 12
0
def run():
   """Build networks, create environment and train agent."""

   # Generate a Torcs environment
   env = TorcsEnv(vision=False, throttle=True, gear_change=False)

   with tf.Session() as sess:
  
      np.random.seed(args['seed'])
      tf.set_random_seed(args['seed'])
      
      # Actor and actor target
      n_params = 0
      actor = ActorNetwork(sess=sess, scope='actor_net', state_size=args['state_size'], 
                           action_size=args['action_size'], batch_size=args['batch_size'], 
                           lr=args['actor_lr'], n_params=n_params)
      n_params += actor.get_num_params()
      actor_target = ActorNetwork(sess=sess, scope='actor_net_target', state_size=args['state_size'], 
                           action_size=args['action_size'], batch_size=args['batch_size'], 
                           lr=args['actor_lr'], n_params=n_params)
      
      # Critic and critic target
      n_params += actor_target.get_num_params()
      critic = CriticNetwork(sess=sess, scope='critic_net', state_size=args['state_size'], 
                           action_size=args['action_size'], lr=args['critic_lr'], n_params=n_params)
      n_params += critic.get_num_params()
      critic_target = CriticNetwork(sess=sess, scope='critic_net_target', state_size=args['state_size'], 
                           action_size=args['action_size'], lr=args['critic_lr'], n_params=n_params)
      
      # Restore network params
      saver = tf.train.Saver()
      saver.restore(sess, os.path.join(os.path.join(args['resources'], "network"), args['file']+'_model'))

      # Train DDPG on Torcs
      test(sess, env, actor, actor_target, critic, critic_target)
def playGame(finetune=0):

    demo_dir = "/home/mathew/Documents/RL/human_0/"
    param_dir = "/home/mathew/Documents/RL/wgail_info_params_0/"
    pre_actions_path = "/home/mathew/Documents/RL/human_0/pre_actions.npz"
    feat_dim = [7, 13, 1024]
    img_dim = [50, 50, 3]
    aux_dim = 10
    #encode_dim = 2
    #encode_dim is 2 in the case of pass and turn, 4 in the case of pass and turn in single trajectory
    encode_dim = 4
    action_dim = 3

    np.random.seed(1024)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)

    # initialize the env
    env = TorcsEnv(throttle=True, gear_change=False)

    # define the model
    pre_actions = np.load(pre_actions_path)["actions"]
    agent = TRPOAgent(env, sess, feat_dim, aux_dim, encode_dim, action_dim,
                      img_dim, pre_actions)

    # Load expert (state, action) pairs
    demo = np.load(demo_dir + "demo.npz")

    # Now load the weight
    print("Now we load the weight")
    try:
        if finetune:
            agent.generator.load_weights(param_dir +
                                         "params_0/generator_model_37.h5")
            agent.discriminator.load_weights(
                param_dir + "params_0/discriminator_model_37.h5")
            agent.baseline.model.load_weights(param_dir +
                                              "params_0/baseline_model_37.h5")
            agent.posterior.load_weights(param_dir +
                                         "params_0/posterior_model_37.h5")
            agent.posterior_target.load_weights(
                param_dir + "params_0/posterior_target_model_37.h5")
        else:
            agent.generator.load_weights(
                param_dir + "params_bc/params_3/generator_bc_model.h5")
        print("Weight load successfully")
    except:
        print("Cannot find the weight")

    print("TORCS Experiment Start.")
    agent.learn(demo)

    print("Finish.")
Exemplo n.º 14
0
def playGame(train_indicator=0):    #1 means Train, 0 means simply Run
    BUFFER_SIZE = 100000
    BATCH_SIZE = 32
    GAMMA = 0.99
    TAU = 0.001     #Target Network HyperParameters
    LRA = 0.0001    #Learning rate for Actor
    LRC = 0.001     #Lerning rate for Critic

    action_dim = 4  #Steering/Acceleration/Brake
    state_dim = 29  #of sensors input

    np.random.seed(1337)

    vision = False

    EXPLORE = 100000.
    episode_count = 2000
    max_steps = 100000
    reward = 0
    done = False
    step = 0
    epsilon = 1
    indicator = 0

    # Generate a Torcs environment
    env = TorcsEnv(vision=vision, throttle=False,gear_change=False)
    nb_actions = 3  # left, nothing , right, break

    model = Sequential()
    model.add(Flatten(input_shape=(window_length,29)))
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions, activation='linear'))
    print(model.summary())

    memory = SequentialMemory(limit=1000000, window_length=window_length)
    policy = BoltzmannQPolicy(tau=1.)
    processor=MyProcessor()

    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
                enable_dueling_network=True, dueling_type='avg',
                target_model_update=1e-2, policy=policy,
                processor=processor)
    dqn.compile(RMSprop(lr=1e-3), metrics=['mae'])
    dqn.load_weights('duel_dqn_{}_weights.h5f'.format(ENV_NAME))
    dqn.fit(env, nb_steps=500000, visualize=False, verbose=2)

    # After training is done, we save the final weights.
    dqn.save_weights('duel_dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=5, visualize=False)
Exemplo n.º 15
0
def testDDPG(sess, args, actor, critic):

    # Generate a Torcs environment
    env = TorcsEnv(vision=False, throttle=True, gear_change=False)    


    episode_count = args['episode_count']
    max_steps = args['max_steps']


    for i in range(episode_count):

        if np.mod(i, 100) == 0:
            ob = env.reset(relaunch=True)   #relaunch TORCS every N episode because of the memory leak error
        else:
            ob = env.reset()


        s = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,  ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))

        ep_reward = 0
        ep_ave_max_q = 0

        
        for j in range(max_steps):

            
            a = actor.predict(np.reshape(s, (1, actor.s_dim))) 
            # NOISE AT TEST TIME MAY BE REQUIRED TO STABILIZE ACTIONS
            a[0,:] += OU(x=a[0,:], mu=mu, sigma=sigma, theta=theta)

            ob, r, terminal, info = env.step(a[0])
            s2 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))


            s = s2
            ep_reward += r

            if terminal:

                print('| Reward: {:d} | Episode: {:d} | Qmax: {:.4f}'.format(int(ep_reward), \
                        i, (ep_ave_max_q / float(j))))
                break
Exemplo n.º 16
0
def main(args):
    args.seed = 0
    random.seed(args.seed)
    np.random.seed(args.seed)

    if args.task == "DuskDrive":
        env = gym.make('flashgames.DuskDrive-v0')
        env.configure(remotes=1)

    elif args.task == "Torcs":
        from gym_torcs import TorcsEnv
        env = TorcsEnv(vision=True, throttle=False)

    elif args.task == "Torcs_novision":
        from gym_torcs import TorcsEnv
        env = TorcsEnv(vision=False, throttle=False)
        
    sess = get_session(str(args.gpu))
    env = setup(env, args)
    collect(env, sess, args)
    def sample_one(self):
        """
        MODIFIED SAMPLING FOR TORCS!
        """
        print
        print('START PLOTTING MODULE'.center(80, '='))
        roll_distance = []
        print
        print("TORCS Experiment Start".center(80, '='))
        env = TorcsEnv(vision=self.config.vision,
                       throttle=self.config.throttle)
        try:
            ob = env.reset()
            sonar, grayscale = self.image_to_sonar(ob.img)
            sonar = np.reshape(sonar, [19])
            state = np.concatenate(
                [sonar, np.array([ob.speedX, ob.speedY, ob.speedZ])], axis=0)
            obs, states, actions, rewards,sonars,grayscales = [], [], [], [],[],[]

            done = False  #has the episode ended?
            start_time = time.time()
            while not done and (time.time() - start_time < 300):
                states.append(state)
                obs.append(ob)
                sonars.append(sonar)
                grayscales.append(grayscale)
                state = np.concatenate(
                    [sonar, np.array([ob.speedX, ob.speedY, ob.speedZ])],
                    axis=0)

                action = self.sess.run(
                    self.sampled_action,
                    feed_dict={
                        self.observation_placeholder:
                        np.reshape(state, [1, self.observation_dim])
                    })[0]
                ob, reward, done, info = env.step(action)
                sonar, grayscale = self.image_to_sonar(ob.img)
                sonar = np.reshape(sonar, [19])

                #print('Action: ', action)
                actions.append(action)
                rewards.append(reward)
                roll_distance.append(env.distance_travelled)
                #print('Roll distance: ', roll_distance)
        except:
            raise

        finally:
            env.end()  # This is for shutting down TORCS
            print("Finished TORCS session".center(80, '='))
            print('Final distance: ', roll_distance[-1], ' [m]')
            print('END PLOTTING MODULE'.center(80, '='))
            #Plot some of the frames:
            self.grayscales = grayscales
            self.sonars = sonars
            self.obs = obs
            self.actions = actions
            self.roll_distance = roll_distance
            return
Exemplo n.º 18
0
def main():
    ppo = PPOAgent()
    env = TorcsEnv(text=True, vision=False, throttle=True, gear_change=False)
    saver = tf.train.Saver()

    with tf.Session() as sess:
        saver.restore(sess, './save_model13/models/model_59557.ckpt')

        for i in range(1000):
            obs = env.reset()
            state = convert_obs(obs)
            score, step = 0, 0

            while True:
                action = ppo.choose_action(state)
                next_obs, reward, done, _ = env.step(action)
                time.sleep(0.05)

                next_state = convert_obs(next_obs)

                score += reward
                step += 1

                if done:
                    print(step, score)
                    env.reset()
                    step, score = 0, 0
                else:
                    state = next_state
    def retrain(self):
       for episode in range(n_episode):
           self.env = TorcsEnv(vision=True, throttle=False)
           ob = self.env.reset(relaunch=True)
           reward_sum = 0
           i = 0
           print("# Episode: %d start" % episode)
           for i in range(steps):
              act = self.model.predict(self.img_reshape(ob.img/255))
              ob, reward, done, _ = self.env.step(act)
              if done is True:
                 break
              else:
                 self.D.append([self.img_reshape(ob.img/255), act, np.array([reward])])
              reward_sum += reward

           print("# step: %d reward: %f " % (i, reward_sum))
           self.env.end()
           if i == (steps-1):
              break
           self.train()
           self.save()     
Exemplo n.º 20
0
def play_trajectory(filepath,counts):
    
    # Defining torcs environment
    env = TorcsEnv(vision=VISION,throttle=True,gear_change=False)


    states,actions = load_expert_trajectory(filepath)
    
    for i in range(counts):
        print("Playing count : {}".format(i))
        if np.mod(i, 3) == 0:
            ob = env.reset(relaunch=True)
        else:
            ob = env.reset()

        state = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))

        for action in actions:
        
            # take action and observe reward and next state     
            ob, reward, done, info = env.step(action)

            if done:
                break
Exemplo n.º 21
0
def main():
    # Creating necessary directories
    track_no = 5
    experiment_name = "tensorboard-4"
    experiment_dir  = "experiment-%s/" % experiment_name
    datas_dir = experiment_dir + "datas-track-no-%d/" % track_no
    models_dir = datas_dir + "model/"

    if os.path.exists(experiment_dir) == False:
        print("%s dosen't exists" % experiment_dir)
        return

    if os.path.exists(datas_dir) == False:
        print("%s dosen't exists" % datas_dir)
        return

    if os.path.exists(models_dir) == False:
        print("%s dosen't exists" % models_dir)
        return

    state_dim = 4
    img_dim = [304, 412, 3]
    sess = tf.InteractiveSession()
    agent = Supervise(sess, state_dim, img_dim, models_dir)
    agent.load_network()

    MAX_STEP = 10000
    step = 0
    vision = True
    env = TorcsEnv(vision=vision, throttle=True, text_mode=False, track_no=track_no, random_track=False, track_range=(5, 8))
    for i in range(1):
        if np.mod(i, 3) == 0:
            ob = env.reset(relaunch=True)
        else:
            ob = env.reset()

        s_t = np.hstack((ob.speedX, ob.speedY, ob.speedZ, 0.0))
        i_t = ob.img
        # print(i_t)

        while step < MAX_STEP:
            action = agent.action(s_t, i_t)
            ob, reward, done, info = env.step([action, 0.16, 0])
            s_t = np.hstack((ob.speedX, ob.speedY, ob.speedZ, action))
            i_t = ob.img

            print("Step", step, "Action", action, "Reward", reward)
            if done == True:
                break

    env.end()
    def collectData(self):
    # collect state-action pairs of imitation learning
        # self.images_all = np.zeros((0, img_dim[0], img_dim[1], img_dim[2]))
        # self.actions_all = np.zeros((0, n_action))
        # self.rewards_all = np.zeros((0,))

        # img_list = []
        # action_list = []
        # reward_list = []
      
        self.env = TorcsEnv(vision=True, throttle=False)
        ob = self.env.reset(relaunch=True)

        print('Collecting data from expert ... ')
        for i in range(steps):
            if i == 0:
                act = np.array([0.0])
            else:
                act = self.get_teacher_action(ob)
                print("act %f" % act)
            if i % 100 == 0:
                print("step:", i)
            ob, reward, done, _ = self.env.step(act)
          #  img_list.append(ob.img/255)  # normanize RGB value to [0,1]
          #  action_list.append(act)
          #  reward_list.append(np.array([reward]))
            if i % 10 == 0:
                self.save_img(ob.img, i)
            self.D.append([self.img_reshape(ob.img/255), act, np.array([reward])])

            if len(self.D) > memory:
                self.D.popleft()

        print("step: %d" % steps)
        self.env.end()
        '''
Exemplo n.º 23
0
def main():
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)

    generator = Generator(sess, feat_dim, aux_dim, encode_dim, action_dim)
    base_model = ResNet50(weights='imagenet', include_top=False)
    feat_extractor = Model(
        input=base_model.input,
        output=base_model.get_layer('activation_40').output
    )

    try:
        generator.model.load_weights(param_path)
        print("Weight load successfully")
    except:
        print("cannot find weight")

    env = TorcsEnv(throttle=True, gear_change=False)

    print("Start driving ...")
    ob = env.reset(relaunch=True)
    feat, aux = get_state(ob, aux_dim, feat_extractor)

    encode = np.zeros((1, encode_dim), dtype=np.float32)
    encode[0, code] = 1
    print "Encode:", encode[0]

    pre_actions = np.load(pre_actions_path)["actions"]

    for i in xrange(MAX_STEP_LIMIT):
        if i < MIN_STEP_LIMIT:
            action = np.zeros(3, dtype=np.float32)
        elif i < MIN_STEP_LIMIT + PRE_STEP:
            action = pre_actions[i - MIN_STEP_LIMIT]
        else:
            action = generator.model.predict([feat, aux, encode])[0]

        ob, reward, done, _ = env.step(action)
        feat, aux = get_state(ob, aux_dim, feat_extractor)

        if i == MIN_STEP_LIMIT + PRE_STEP:
            print "Start deciding ..."

        print "Step:", i, "DistFromStart:", ob.distFromStart, \
                "TrackPos:", ob.trackPos, "Damage:", ob.damage.item(), \
                "Action: %.6f %.6f %.6f" % (action[0], action[1], action[2]), \
                "Speed:", ob.speedX * 200

        if done:
            break

    env.end()
    print("Finish.")
Exemplo n.º 24
0
def train(device):
    # hyper-parameters
    coeff_entropy = 0.00001
    lr = 5e-4
    mini_batch_size = 64
    horizon = 2048
    nupdates = 10
    nepoch = 5000
    clip_value = 0.2
    train = True
    render = False
    # initialize env
    env = TorcsEnv(port=3101, path="/usr/local/share/games/torcs/config/raceman/quickrace.xml")
    insize = env.observation_space.shape[0]
    outsize = env.action_space.shape[0]

    policy = MLPPolicy(insize, action_space = outsize)
    policy.to(device)
    if os.path.exists('policy.pth'):
        policy.load_state_dict(torch.load('policy.pth', map_location = device))
        print('Loading complete!')
    if train:
        optimizer = Adam(lr=lr, params=policy.parameters())
        mse = MSELoss()

        # start training
        for e in range(nepoch):
            # generate trajectories
            relaunch = e%100 == 0
            observations, actions, logprobs, returns, values, rewards = \
                generate_trajectory(env, policy, horizon, is_render=render,
                                    obs_fn=None, progress=True, device=device, is_relaunch = relaunch)
            print('Episode %s reward is %s' % (e, rewards.sum()))
            memory = (observations, actions, logprobs, returns[:-1], values)
            # update using ppo
            policy_loss, value_loss, dist_entropy =\
                ppo_update(
                    policy, optimizer, mini_batch_size, memory, nupdates,
                    coeff_entropy=coeff_entropy, clip_value=clip_value, device=device
                )
            print('\nEpisode: {}'.format(e))
            print('Total reward {}'.format(rewards.sum()))
            print('Entropy', dist_entropy)
            print('Policy loss', policy_loss)
            print('Value loss', value_loss)
            if np.mod(e+1, 10) == 0:
                print("saving model")
                torch.save(policy.state_dict(), 'policy.pth')
Exemplo n.º 25
0
    def run(self):
        ### create TORCS environment
        env = TorcsEnv(vision=False, throttle=True)   

        ### start run according to supplied arguments
        if self.algorithm == "dqn" and self.modus == "train":
            agent = DQNAgent(env, self.track, self.numOfEpisodes)
            agent.trainAgent()
        elif self.algorithm == "dqn" and self.modus == "test":
            agent = DQNAgent(env, self.track, self.numOfEpisodes)
            agent.testAgent()
        elif self.algorithm == "ddpg" and self.modus == "train":
            agent = DDPGAgent(env, self.track, self.numOfEpisodes)
            agent.trainAgent()
        elif self.algorithm == "ddpg" and self.modus == "test":
            agent = DDPGAgent(env, self.track, self.numOfEpisodes)
            agent.testAgent()
    def __init__(self):
        self.critic = self.build_critic()
        if CONTINUOUS is False:
            self.actor = self.build_actor()
        else:
            self.actor = self.build_actor_continuous()

        self.env = env = TorcsEnv(vision=True,
                                  throttle=False,
                                  gear_change=False)
        print(self.env.action_space, 'action_space',
              self.env.observation_space, 'observation_space')
        self.episode = 0
        self.observation = self.env.reset()
        self.val = False
        self.reward = []
        self.reward_over_time = []
        self.name = self.get_name()
        self.writer = SummaryWriter(self.name)
        self.gradient_steps = 0
Exemplo n.º 27
0
def play_training(training=True, load_model=True):
    with tf.device("/cpu:0"):
        global_episodes = tf.Variable(0,
                                      dtype=tf.int32,
                                      name='global_episodes',
                                      trainable=False)
        # trainer = tf.train.RMSPropOptimizer(learning_rate=1e-4, decay=0.99, epsilon=1)
        trainer = tf.train.AdamOptimizer(learning_rate=1e-4)
        master_network = AC_Network(s_size, a_size, 'global', None, False)

        if training:
            #num_workers = multiprocessing.cpu_count()  # Set workers at number of available CPU threads
            num_workers = 4
        else:
            num_workers = 1

        workers = []
        for i in range(num_workers):
            workers.append(
                Worker(
                    TorcsEnv(vision=True,
                             throttle=False,
                             gear_change=False,
                             port=3101 + i), i, s_size, a_size, trainer,
                    model_path, global_episodes, False))
        saver = tf.train.Saver()

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        initialize_variables(saver, sess, load_model)
        # Asynchronous magic happens: start the "work" process for each worker in a separate thread.
        worker_threads = []
        for worker in workers:
            worker_work = lambda: worker.work(max_episode_length, gamma, sess,
                                              coord, saver, training)
            t = threading.Thread(target=worker_work)
            t.start()
            sleep(0.5)
            worker_threads.append(t)
        coord.join(
            worker_threads)  # waits until the specified threads have stopped.
Exemplo n.º 28
0
def test():
    env = TorcsEnv(vision=True, throttle=False)
    ob = env.reset(relaunch=True)
    reward_sum = 0.0
    done = False

    count = 0
    while not done:
        act = model.predict(img_reshape(ob.img).astype('float32') / 255)
        #print(act)
        count += 1
        ob, reward, done, _ = env.step(act)
        reward_sum += reward
    env.end()
    print("Steps before crash: ", count, reward_sum)
    return count, reward_sum
Exemplo n.º 29
0
class A3C_Agent(Process):
    def __init__(self, rank, global_net, counter, lock, args):
        super(A3C_Agent, self).__init__()
        self.network = A3C_Network(29, 3, lock)
        self.global_net = global_net
        self.counter = counter
        self.lock = lock
        self.args = args
        self.done = True
        self.name = ''
        self.env = TorcsEnv(
            port=3101 + rank,
            path='/usr/local/share/games/torcs/config/raceman/quickrace.xml')
        self.reset()

    def reset(self, relaunch=None):
        if relaunch is None:
            relaunch = self.done
        # Synchronizing
        self.time_step = self.counter.value()
        self.network.reset(self.global_net, self.done)
        if self.done:
            self.state = self.env.reset(relaunch=relaunch,
                                        sampletrack=True,
                                        render=False)
        self.values = []
        self.rewards = []
        self.log_probs = []
        self.entropies = []

    def normalize_state(self):
        def normalize_from(x, a, b):
            return (2 * ((x - a) / (b - a))) - 1

        self.state[1:20] = normalize_from(self.state[1:20], -0.01, 2)
        self.state[21:] = normalize_from(self.state[21:], 0, 1)

    def internal_log(self, *msg):
        with open(f'../../logs/{self.name}.txt', 'a+') as file:
            print(*msg, file=file)
Exemplo n.º 30
0
def programmatic_game(tree_program, track_name='practgt2.xml'):
    episode_count = 2
    max_steps = 100000
    window = 5

    # Generate a Torcs environment
    env = TorcsEnv(vision=False,
                   throttle=True,
                   gear_change=False,
                   track_name=track_name)

    logging.info("TORCS Experiment Start with Priors on " + track_name)
    for i_episode in range(episode_count):
        ob = env.reset(
            relaunch=True
        )  # relaunch TORCS every 3 episode because of the memory leak error
        tempObs = [[ob.speedX], [ob.angle], [ob.trackPos], [ob.speedY],
                   [ob.speedZ], [ob.rpm],
                   list(ob.wheelSpinVel / 100.0),
                   list(ob.track), [0, 0, 0]]
        newobs = [item for sublist in tempObs[:-1] for item in sublist]

        for j in range(max_steps):
            act_tree = tree_program.predict([newobs])
            action_prior = [act_tree[0][0], act_tree[0][1], act_tree[0][2]]

            tempObs = [[ob.speedX], [ob.angle], [ob.trackPos], [ob.speedY],
                       [ob.speedZ], [ob.rpm],
                       list(ob.wheelSpinVel / 100.0),
                       list(ob.track), action_prior]
            newobs = [item for sublist in tempObs[:-1] for item in sublist]

            ob, r_t, done, info = env.step(action_prior)
            if np.mod(j, 1000) == 0:
                logging.info("Episode " + str(i_episode) + " Distance " +
                             str(ob.distRaced) + " Lap Times " +
                             str(ob.lastLapTime))

            if done:
                print('Done. Steps: ', j)
                break

        env.end()  # This is for shutting down TORCS
        logging.info("Finish.")
Exemplo n.º 31
0
def playGame(train_indicator=1):    #1 means Train, 0 means simply Run
    BUFFER_SIZE = 100000
    BATCH_SIZE = 32
    GAMMA = 0.99
    TAU = 0.001     #Target Network HyperParameters
    LRA = 0.0001    #Learning rate for Actor
    LRC = 0.001     #Lerning rate for Critic

    action_dim = 3  #Steering/Acceleration/Brake
    state_dim = 29  #of sensors input

    np.random.seed(1337)

    vision = False

    EXPLORE = 100000.
    episode_count = 2000
    max_steps = 100000
    reward = 0
    done = False
    step = 0
    epsilon = 1
    indicator = 0

    #Tensorflow GPU optimization
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)

    actor = ActorNetwork(sess, state_dim, action_dim, BATCH_SIZE, TAU, LRA)
    critic = CriticNetwork(sess, state_dim, action_dim, BATCH_SIZE, TAU, LRC)
    buff = ReplayBuffer(BUFFER_SIZE)    #Create replay buffer

    # Generate a Torcs environment
    env = TorcsEnv(vision=vision, throttle=True,gear_change=False)

    #Now load the weight
    print("Now we load the weight")
    try:
        actor.model.load_weights("actormodel.h5")
        critic.model.load_weights("criticmodel.h5")
        actor.target_model.load_weights("actormodel.h5")
        critic.target_model.load_weights("criticmodel.h5")
        print("Weight load successfully")
    except:
        print("Cannot find the weight")

    print("TORCS Experiment Start.")
    for i in range(episode_count):

        print("Episode : " + str(i) + " Replay Buffer " + str(buff.count()))

        if np.mod(i, 3) == 0:
            ob = env.reset(relaunch=True)   #relaunch TORCS every 3 episode because of the memory leak error
        else:
            ob = env.reset()

        s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,  ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))
     
        total_reward = 0.
        for j in range(max_steps):
            loss = 0 
            epsilon -= 1.0 / EXPLORE
            a_t = np.zeros([1,action_dim])
            noise_t = np.zeros([1,action_dim])
            
            a_t_original = actor.model.predict(s_t.reshape(1, s_t.shape[0]))
            noise_t[0][0] = train_indicator * max(epsilon, 0) * OU.function(a_t_original[0][0],  0.0 , 0.60, 0.30)
            noise_t[0][1] = train_indicator * max(epsilon, 0) * OU.function(a_t_original[0][1],  0.5 , 1.00, 0.10)
            noise_t[0][2] = train_indicator * max(epsilon, 0) * OU.function(a_t_original[0][2], -0.1 , 1.00, 0.05)

            #The following code do the stochastic brake
            #if random.random() <= 0.1:
            #    print("********Now we apply the brake***********")
            #    noise_t[0][2] = train_indicator * max(epsilon, 0) * OU.function(a_t_original[0][2],  0.2 , 1.00, 0.10)

            a_t[0][0] = a_t_original[0][0] + noise_t[0][0]
            a_t[0][1] = a_t_original[0][1] + noise_t[0][1]
            a_t[0][2] = a_t_original[0][2] + noise_t[0][2]

            ob, r_t, done, info = env.step(a_t[0])

            s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))
        
            buff.add(s_t, a_t[0], r_t, s_t1, done)      #Add replay buffer
            
            #Do the batch update
            batch = buff.getBatch(BATCH_SIZE)
            states = np.asarray([e[0] for e in batch])
            actions = np.asarray([e[1] for e in batch])
            rewards = np.asarray([e[2] for e in batch])
            new_states = np.asarray([e[3] for e in batch])
            dones = np.asarray([e[4] for e in batch])
            y_t = np.asarray([e[1] for e in batch])

            target_q_values = critic.target_model.predict([new_states, actor.target_model.predict(new_states)])  
           
            for k in range(len(batch)):
                if dones[k]:
                    y_t[k] = rewards[k]
                else:
                    y_t[k] = rewards[k] + GAMMA*target_q_values[k]
       
            if (train_indicator):
                loss += critic.model.train_on_batch([states,actions], y_t) 
                a_for_grad = actor.model.predict(states)
                grads = critic.gradients(states, a_for_grad)
                actor.train(states, grads)
                actor.target_train()
                critic.target_train()

            total_reward += r_t
            s_t = s_t1

            if np.mod(step, 30) == 0:
                print("Episode", i, "Step", step, "Action", a_t, "Reward", r_t, "Loss", loss)
        
            step += 1
            if done:
                break

        if np.mod(i, 3) == 0:
            if (train_indicator):
                print("Now we save model")
                actor.model.save_weights("actormodel.h5", overwrite=True)
                with open("actormodel.json", "w") as outfile:
                    json.dump(actor.model.to_json(), outfile)

                critic.model.save_weights("criticmodel.h5", overwrite=True)
                with open("criticmodel.json", "w") as outfile:
                    json.dump(critic.model.to_json(), outfile)

        print("TOTAL REWARD @ " + str(i) +"-th Episode  : Reward " + str(total_reward))
        print("Total Step: " + str(step))
        print("")

    env.end()  # This is for shutting down TORCS
    print("Finish.")
Exemplo n.º 32
0
from gym_torcs import TorcsEnv
from sample_agent import Agent
import numpy as np

vision = True
episode_count = 10
max_steps = 50
reward = 0
done = False
step = 0

# Generate a Torcs environment
env = TorcsEnv(vision=vision, throttle=False)

agent = Agent(1)  # steering only


print("TORCS Experiment Start.")
for i in range(episode_count):
    print("Episode : " + str(i))

    if np.mod(i, 3) == 0:
        # Sometimes you need to relaunch TORCS because of the memory leak error
        ob = env.reset(relaunch=True)
    else:
        ob = env.reset()

    total_reward = 0.
    for j in range(max_steps):
        action = agent.act(ob, reward, done, vision)
Exemplo n.º 33
0
#     critic_model = load_model('critic_model_{}.h5'.format(ITERATIONS))
#     update_op, action_gradient_holder = get_actor_update_operation(actor_model)
#     gradient_op = get_gradient_operation(critic_model)
# else:

import sys
sys.path.append(os.path.abspath('./gym_torcs'))
print(sys.path)
from gym_torcs import TorcsEnv

#### Generate a Torcs environment
# enable vision input, the action is steering only (1 dim continuous action)
#env = TorcsEnv(vision=False, throttle=False)

# without vision input, the action is steering and throttle (2 dim continuous action)
env = TorcsEnv(vision=False, throttle=True)

# ob = env.reset()  # without torcs relaunch

# Generate an agents
actor_model = basic_actor_model()
critic_model = basic_critic_model()
if TARGET_MODEL:
    target_actor_model = basic_actor_model()
    target_critic_model = basic_critic_model()
else:
    target_actor_model = None
    target_critic_model = None


update_op, action_gradient_holder = get_actor_update_operation(actor_model)