def mlDriver(cv, stateTransfer, actionTransfer): #parameter setup #dimensionality of state argument (could be less than stateTransfer) stateDim = 352 #Number of moves possible numMoves = 361 env = SettleEnv(cv, stateTransfer, actionTransfer) task = SettleTask(env) controller = RestrictedActionValueNetwork(stateDim, numMoves, env) learner = NFQ() learner.explorer = EpsilonHackedExplorer(env) agent = LearningAgent(controller, learner) experiment = EpisodicExperiment(task, agent) while True: experiment.doEpisodes(10) print "Done with experiments" agent.learn() print "Learned" agent.reset() print "Cycled"
from pybrain.rl.environments.cartpole import CartPoleEnvironment, DiscreteBalanceTask from pybrain.rl.agents import LearningAgent from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork from pybrain.rl.experiments import EpisodicExperiment from training import NFQTraining task = DiscreteBalanceTask(CartPoleEnvironment(), 100) action_value_function = ActionValueNetwork(4, 3, name='CartPoleNFQActionValueNetwork') learner = NFQ() #learner.gamma = 0.99 learner.explorer.epsilon = 0.4 task.discount = learner.gamma agent = LearningAgent(action_value_function, learner) performance_agent = LearningAgent(action_value_function, None) experiment = EpisodicExperiment(task, agent) tr = NFQTraining('cartpole_nfq', experiment, performance_agent) tr.train(7000, performance_interval=1, n_performance_episodes=5)
self.env.reset() @property def indim(self): return self.env.indim @property def outdim(self): return self.env.outdim env = TetrisEnv(10,20) #Tetris task = TetrisTask(env) QNet = ActionValueNetwork(10*20+11, 6); learner = NFQ(); #Q()? learner._setExplorer(EpsilonGreedyExplorer(0.2,decay=0.99)) agent = LearningAgent(QNet,learner); experiment = EpisodicExperiment(task,agent) while True: experiment.doEpisodes(1) agent.learn() agent.reset() #or call more sporadically...? task.reset()
from pybrain.rl.agents import LearningAgent from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork from pybrain.rl.experiments import EpisodicExperiment from environment import Environment from tasks import BalanceTask from training import NFQTraining task = BalanceTask() action_value_function = ActionValueNetwork(task.outdim, task.nactions, name='BalanceNFQActionValueNetwork') learner = NFQ() learner.gamma = 0.9999 learner.explorer.epsilon = 0.9 task.discount = learner.gamma agent = LearningAgent(action_value_function, learner) performance_agent = LearningAgent(action_value_function, None) experiment = EpisodicExperiment(task, agent) tr = NFQTraining('balance_nfq', experiment, performance_agent) tr.train(7000, performance_interval=1, n_performance_episodes=1, plotsave_interval=10, plot_action_history=True)
sys.exit() if len( sys.argv ) < 3: print 'Must supply an output file!' sys.exit() type = int( sys.argv[1] ) # 1 = Uncert&Salience, 2 = Salience, 3 = Uncert, 4 = Activation env = DistractorRatio() # Create an instance of the D-R task # Create an action/value neural net with an state space of 100 and an action space of 8 if type == 1: module = ActionValueNetwork( 99, 7 ) else: module = ActionValueNetwork( 51, 4 ) learner = NFQ() learner.offPolicy = False # Disable off policy learning #learner.explorer = HumanExplorer() learner.explorer.epsilon = 0.4 #learner.explorer.decay = 0.99 agent = HumanAgent( module, learner, type ) # Create an agent that learns with NFQ testagent = HumanAgent( module, None, type ) # Create a testing agent experiment = CustomEpisodicExperiment( env, agent ) # Put the agent in the environment if len( sys.argv ) == 4: print 'Loading saved net...' module.network = NetworkReader.readFrom( sys.argv[3] ) def save( history, net ): """ This function gets called after each training/testing block or when the
from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork from pybrain.rl.explorers import BoltzmannExplorer from numpy import array, arange, meshgrid, pi, zeros, mean from matplotlib import pyplot as plt plt.ion() env = CartPoleEnvironment() renderer = CartPoleRenderer() env.setRenderer(renderer) # renderer.start() module = ActionValueNetwork(2, 3) task = DiscreteBalanceTask(env, 50) learner = NFQ() learner.explorer = BoltzmannExplorer() agent = LearningAgent(module, learner) testagent = LearningAgent(module, None) experiment = EpisodicExperiment(task, agent) def plotStateValues(module, fig): plt.figure(fig.number) theta_ = arange(-pi/2, pi/2, 0.1) v_ = arange(-5, 5, 0.3) X,Y = meshgrid(theta_, v_) X = X.flatten() Y = Y.flatten() Q = zeros(len(theta_) * len(v_))
EpisodicTask.reset(self) self.env.reset() @property def indim(self): return self.env.indim @property def outdim(self): return self.env.outdim env = TetrisEnv(10, 20) #Tetris task = TetrisTask(env) QNet = ActionValueNetwork(10 * 20 + 11, 6) learner = NFQ() #Q()? learner._setExplorer(EpsilonGreedyExplorer(0.2, decay=0.99)) agent = LearningAgent(QNet, learner) experiment = EpisodicExperiment(task, agent) while True: experiment.doEpisodes(1) agent.learn() agent.reset() #or call more sporadically...? task.reset()