if not continue_execution: os.makedirs(main_outdir) # Initiate learning information with open(main_outdir + 'episode_data.csv', 'w') as csvRWRD: csvRWRD_writer = csv.writer(csvRWRD, dialect='excel') csvRWRD_writer.writerow([ 'Episode', 'Goal', 'Steps', 'Reward', 'Total Goals', 'Average Steps' ]) csvRWRD.close() last_time_steps = numpy.ndarray(0) qlearn = qlearn.QLearn(actions=range(env.action_space.n), alpha=0.1, gamma=0.9, epsilon=1, qdir=qtabledir) initial_epsilon = qlearn.epsilon epsilon_discount = 0.9986 start_time = time.time() total_goals = 0 total_succeed_steps = 0 total_episodes = 1000 highest_reward = -10000000 fewest_steps = 10000000 best_act = []
== 0) and (x != 0) and (x > render_skip) and (render_episodes < x): env.render(close=True) if __name__ == '__main__': env = gym.make('GazeboProjectTurtlebot-v0') outdir = '/tmp/gazebo_gym_experiments' env = gym.wrappers.Monitor(env, outdir, force=True) plotter = liveplot.LivePlot(outdir) last_time_steps = numpy.ndarray(0) qlearn = qlearn.QLearn(actions=range(env.action_space.n), alpha=0.2, gamma=0.8, epsilon=0.0) initial_epsilon = qlearn.epsilon epsilon_discount = 0.9986 start_time = time.time() total_goals = 0 total_succeed_steps = 0 total_episodes = 1001 highest_reward = -10000000 fewest_steps = 10000000 best_act = [] best_act_time = []