print("TORCS Experiment Start.") for i in range(episode_count): print("Episode : " + str(i)) if np.mod(i, 3) == 0: # Sometimes you need to relaunch TORCS because of the memory leak error ob = env.reset(relaunch=True) else: ob = env.reset() total_reward = 0. for j in range(max_steps): action = agent.act(ob, reward, done, step) #Steering/Acceleration/Gear/Brake ob, reward, done, _ = env.step(action) #angle, curLapTime, distFromStart, focus, gear, lastLapTime, rpm, speed_x, speed_y, speed_z, track, trackPos, wheelSpinVel, x, y, z, roll, pitch, yaw, speedGlobalX, speedGlobalY = ob #angle, tgAngle, segAngle, curLapTime, distFromStart, lastLapTime, speed_x, speed_y, speed_z, trackPos, x, y, yaw = ob for s in state_cols: obs_dic[s] = np.append(obs_dic[s], ob[s]) if ob['distFromStart'] > track_length and save == False: # get ready only at the end of the lap print("Almost at the end of the lap") save = True if ob['distFromStart'] < 3: # as soon as the car pass the finish line check if it was the best time and save new trajectory print('Lap Finished') if save == True and ob['lastLapTime'] > 0: # this check is to save just once at each lap save = False # so that it save only once df = pd.DataFrame(obs_dic) """if lastLapTime < bestTime: # if it is the best lap time, save as reference trajectory
for i in range(episode_count): print("Episode : " + str(i)) if np.mod(i, 3) == 0: # Sometimes you need to relaunch TORCS because of the memory leak error ob = env.reset(relaunch=True) else: ob = env.reset() total_reward = 0. if step == max_steps and not done and collect_data_mode: agent.next_dataset() for j in range(max_steps): action = agent.act(ob, reward, done, step) ob, reward, done, _ = env.step(action) total_reward += reward step += 1 if done: break print("TOTAL REWARD @ " + str(i) + " -th Episode : " + str(total_reward)) print("Total Step: " + str(step)) print("") agent.end(step == max_steps) env.end() # This is for shutting down TORCS