示例#1
0

print("TORCS Experiment Start.")
for i in range(episode_count):
    print("Episode : " + str(i))

    if np.mod(i, 3) == 0:
        # Sometimes you need to relaunch TORCS because of the memory leak error
        ob = env.reset(relaunch=True)
    else:
        ob = env.reset()

    total_reward = 0.

    for j in range(max_steps):
        action = agent.act(ob, reward, done, step)  #Steering/Acceleration/Gear/Brake

        ob, reward, done, _ = env.step(action)
        #angle, curLapTime, distFromStart, focus, gear, lastLapTime, rpm, speed_x, speed_y, speed_z, track, trackPos, wheelSpinVel, x, y, z, roll, pitch, yaw, speedGlobalX, speedGlobalY = ob
        #angle, tgAngle, segAngle, curLapTime, distFromStart, lastLapTime, speed_x, speed_y, speed_z, trackPos, x, y, yaw = ob
        for s in state_cols:
            obs_dic[s] = np.append(obs_dic[s], ob[s])
        if ob['distFromStart'] > track_length and save == False:    # get ready only at the end of the lap
            print("Almost at the end of the lap")
            save = True
        if ob['distFromStart'] < 3:   # as soon as the car pass the finish line check if it was the best time and save new trajectory
            print('Lap Finished')
            if save == True and ob['lastLapTime'] > 0:    # this check is to save just once at each lap
                save = False    # so that it save only once
                df = pd.DataFrame(obs_dic)
                """if lastLapTime < bestTime:  # if it is the best lap time, save as reference trajectory
示例#2
0
for i in range(episode_count):
    print("Episode : " + str(i))

    if np.mod(i, 3) == 0:
        # Sometimes you need to relaunch TORCS because of the memory leak error
        ob = env.reset(relaunch=True)
    else:
        ob = env.reset()

    total_reward = 0.

    if step == max_steps and not done and collect_data_mode:
        agent.next_dataset()

    for j in range(max_steps):
        action = agent.act(ob, reward, done, step)

        ob, reward, done, _ = env.step(action)

        total_reward += reward

        step += 1
        if done:
            break

    print("TOTAL REWARD @ " + str(i) + " -th Episode  :  " + str(total_reward))
    print("Total Step: " + str(step))
    print("")

agent.end(step == max_steps)
env.end()  # This is for shutting down TORCS