from training import LinearFATraining from learners import SARSALambda_LinFA_ReplacingTraces task = LinearFATileCoding3456BalanceTaskRewardBipolar() learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) agent.epsilonGreedy = True agent.init_exploration = 0.5 # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 2000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving. tr = LinearFATraining( 'balance_sarsalambda_linfa_replacetrace_anneal_RewardBipolar_take2', experiment, performance_agent,
from training import LinearFATraining from learners import SARSALambda_LinFA_ReplacingTraces task = LinearFATileCoding3456BalanceTaskRewardPower8() learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) agent.epsilonGreedy = True agent.init_exploration = 0.5 # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 2000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving. tr = LinearFATraining('balance_sarsalambda_linfa_replacetrace_anneal_RewardPower8_take1', experiment, performance_agent, verbose=True) tr.train(7000, performance_interval=1, n_performance_episodes=1,
delta_dist = dist_to_goal - dist_to_goal_last return -delta_tilt - delta_dist * 0.01 task = LSPI_task() learner = LSPI(9, 20) task.rewardDiscount = 0.8 learner.rewardDiscount = 0.8 agent = LinearFA_Agent(learner) agent.epsilonGreedy = True exp = EpisodicExperiment(task, agent) learner.learningRateDecay = 3000 max_agent = LinearFA_Agent(learner) max_agent.learnerning = False max_agent.greedy = True task.env.saveWheelContactTrajectories(True) plt.ion() plt.figure(figsize=(8, 4)) ax1 = plt.subplot(1, 2, 1) ax2 = plt.subplot(1, 2, 2) def update_wheel_trajectories(): front_lines = ax2.plot(task.env.get_xfhist(), task.env.get_yfhist(), 'r') back_lines = ax2.plot(task.env.get_xbhist(), task.env.get_ybhist(), 'b') plt.axis('equal')