Exemplo n.º 1
0
def main():
    """main method
    
    log runtime and print it at the end
    """
    s_time = timeit.default_timer()     
    global iteration
    env = TorcsEnv(vision=False, throttle=True, gear_change=False)
    memory = ReplayBuffer()
    epsilon = 1
    train_indicator = True
    modelPATH = os.path.join('.',"models",'E0011.pt')

    q,q_target = QNet(state_dim,action_dim),QNet(state_dim,action_dim)
    q_target.load_state_dict(q.state_dict())
    mu, mu_target = MuNet(state_dim), MuNet(state_dim)
    mu_target.load_state_dict(mu.state_dict())
    steer_noise = OUN(np.zeros(1),theta = 0.6)
    accel_noise = OUN(np.zeros(1),theta = 0.6)
    mu_optimizer = optim.Adam(mu.parameters(), lr=lr_mu)
    q_optimizer  = optim.Adam(q.parameters(), lr=lr_q)

    #tensorboard writer
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    log_dir = os.path.join("logs", "ddpg_torch", current_time+'E0011t')
    writer = SummaryWriter(log_dir)
    samplestate = torch.rand(1,29)
    sampleaction = torch.rand(1,2)

    #writer.add_graph(mu,samplestate)
    writer.add_graph(q,(samplestate,sampleaction))
    writer.close

    if train_indicator ==False:
        mu = torch.load(modelPATH)
        mu.eval()
        ob = env.reset()
        score = 0
        for n_step in range(100000):
            s_t = np.hstack((ob.angle, ob.track,ob.trackPos,ob.speedX, ob.speedY,  ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))
            a_t = mu(torch.from_numpy(s_t.reshape(1,-1)).float()).detach().numpy()
            ob,r_t,done,_ = env.step(a_t[0])
            score += r_t
            if done:
                print("score:",score)
                break
        env.end()
        return 0

    for n_epi in range(max_episode):
        print("Episode : " + str(n_epi) + " Replay Buffer " + str(memory.size()))
        if np.mod(n_epi, 3) == 0:
            ob = env.reset(relaunch=True)   #relaunch TORCS every 3 episode because of the memory leak error
        else:
            ob = env.reset()
        a_t = np.zeros([1,action_dim])
        s_t = np.hstack((ob.angle, ob.track,ob.trackPos,ob.speedX, ob.speedY,  ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))
        score = 0
        q_value_writer(q, mu, s_t, writer, 'Episode Start Q value')
        q_value_writer(q_target, mu_target, s_t, writer, 'Episode Start target Q value')
        #t_start = timeit.default_timer()
        for n_step in range(max_step):
            #epsilon -= 1.0/EXPLORE
            a_origin = mu(torch.from_numpy(s_t.reshape(1,-1)).float())
            if train_indicator == True:#add noise for train
                # sn = max(epsilon,0)*steer_noise()
                sn = steer_noise()
                # an = max(epsilon,0)*accel_noise()
                an = accel_noise()
                a_s = a_origin.detach().numpy()[0][0] + sn
                a_t[0][0] = np.clip(a_s,-1,1) # fit in steer arange
                a_a = a_origin.detach().numpy()[0][1] + an
                a_t[0][1] = np.clip(a_a,0,1) # fit in accel arange
                #record noise movement
                if iteration%10==0:
                    writer.add_scalar('Steer noise', sn, iteration)
                    writer.add_scalar('Accel_noise', an, iteration)
            else:
                a_t = a_origin.detatch().numpy()
            ob,r_t,done,_ = env.step(a_t[0])
            score += r_t

            s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))
            memory.put((s_t,a_t[0],r_t,s_t1,done))
            s_temp = copy.deepcopy(s_t) # for end q value log
            s_t = s_t1

            if train_indicator and memory.size()>train_start_size:
                train(mu, mu_target, q, q_target, memory, q_optimizer, mu_optimizer,writer)
                soft_update(mu, mu_target)
                soft_update(q,  q_target)
            
            iteration+=1

            if done:
                q_value_writer(q,mu,s_temp,writer,'Episode End Q value')
                q_value_writer(q_target,mu_target,s_temp,writer,'Episode End target Q value')
                break
        #t_end = timeit.default_timer()
        
        print("TOTAL REWARD @ " + str(n_epi) +"-th Episode  : Reward " + str(score))
        print("Total Step: " + str(n_step))
        print("")
        #print('{}steps, {} time spent'.format(i,t_end-t_start))
    
    torch.save(mu,modelPATH)
    
    env.end()
    
    e_time = timeit.default_timer()
    print("Total step {} and time spent {}".format(iteration, e_time-s_time))
Exemplo n.º 2
0
max_step = 500
ob = env.reset()
print("ob: ", ob)
s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,
                 ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm))
print("s_t: ", s_t)
print("s_t size: ", s_t.size)
a = [[0, 1]]
#t_start = timeit.default_timer()
for i in range(max_step):
    ob, r_t, done, info = env.step(a[0])
    if done:
        break
    s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,
                      ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm))
    memory.put((s_t, a[0], r_t, s_t1, done))
    s_t = s_t1
#t_end = timeit.default_timer()
s_done = s_t
print('done?: ', s_done)
#print('{}steps, {} time spent'.format(i,t_end-t_start))
env.end()
s, a, r, sp, d = memory.sample(3)
print('s: ', s)
print('a: ', a)
print('r: ', r)
print('sp: ', sp)
print('d: ', d)

# # --noise 테스트합니다.--
# noise = OrnsteinUhlenbeckNoise(mu = np.zeros(1),theta=0.1,dt=0.2,sigma = 0.1, x0 = np.array([0.5]))