Пример #1
0
 def mlDriver(cv, stateTransfer, actionTransfer):
     #parameter setup
     #dimensionality of state argument (could be less than stateTransfer)
     stateDim = 352
     #Number of moves possible
     numMoves = 361
     env = SettleEnv(cv, stateTransfer, actionTransfer)
     task = SettleTask(env)
     controller = RestrictedActionValueNetwork(stateDim, numMoves, env)
     learner = NFQ()
     learner.explorer = EpsilonHackedExplorer(env)
     agent = LearningAgent(controller, learner)
     experiment = EpisodicExperiment(task, agent)
     while True:
         experiment.doEpisodes(10)
         print "Done with experiments"
         agent.learn()
         print "Learned"
         agent.reset()
         print "Cycled"
Пример #2
0
 def mlDriver(cv, stateTransfer, actionTransfer):
     #parameter setup
     #dimensionality of state argument (could be less than stateTransfer)
     stateDim = 352
     #Number of moves possible
     numMoves = 361
     env = SettleEnv(cv, stateTransfer, actionTransfer)
     task = SettleTask(env)
     controller = RestrictedActionValueNetwork(stateDim, numMoves, env)
     learner = NFQ()
     learner.explorer = EpsilonHackedExplorer(env)
     agent = LearningAgent(controller, learner)
     experiment = EpisodicExperiment(task, agent)
     while True:
         experiment.doEpisodes(10)
         print "Done with experiments"
         agent.learn()
         print "Learned"
         agent.reset()
         print "Cycled"
Пример #3
0
from pybrain.rl.environments.cartpole import CartPoleEnvironment, DiscreteBalanceTask
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork
from pybrain.rl.experiments import EpisodicExperiment

from training import NFQTraining

task = DiscreteBalanceTask(CartPoleEnvironment(), 100)
action_value_function = ActionValueNetwork(4, 3,
        name='CartPoleNFQActionValueNetwork')
learner = NFQ()
#learner.gamma = 0.99
learner.explorer.epsilon = 0.4
task.discount = learner.gamma
agent = LearningAgent(action_value_function, learner)
performance_agent = LearningAgent(action_value_function, None)
experiment = EpisodicExperiment(task, agent)

tr = NFQTraining('cartpole_nfq', experiment, performance_agent)

tr.train(7000, performance_interval=1, n_performance_episodes=5)

Пример #4
0
        self.env.reset()

    @property
    def indim(self):
        return self.env.indim

    @property
    def outdim(self):
        return self.env.outdim



env = TetrisEnv(10,20) #Tetris
task = TetrisTask(env)

QNet = ActionValueNetwork(10*20+11, 6);

learner = NFQ(); #Q()?
learner._setExplorer(EpsilonGreedyExplorer(0.2,decay=0.99))

agent = LearningAgent(QNet,learner);

experiment = EpisodicExperiment(task,agent)

while True:
    experiment.doEpisodes(1)
    agent.learn()
    agent.reset() #or call more sporadically...?
    task.reset()

Пример #5
0
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork
from pybrain.rl.experiments import EpisodicExperiment

from environment import Environment
from tasks import BalanceTask
from training import NFQTraining

task = BalanceTask()
action_value_function = ActionValueNetwork(task.outdim, task.nactions,
        name='BalanceNFQActionValueNetwork')
learner = NFQ()
learner.gamma = 0.9999
learner.explorer.epsilon = 0.9
task.discount = learner.gamma
agent = LearningAgent(action_value_function, learner)
performance_agent = LearningAgent(action_value_function, None)
experiment = EpisodicExperiment(task, agent)

tr = NFQTraining('balance_nfq', experiment, performance_agent)

tr.train(7000, performance_interval=1, n_performance_episodes=1, plotsave_interval=10, plot_action_history=True)

Пример #6
0
    sys.exit()

if len( sys.argv ) < 3:
    print 'Must supply an output file!'
    sys.exit()



type = int( sys.argv[1] ) # 1 = Uncert&Salience, 2 = Salience, 3 = Uncert, 4 = Activation
env = DistractorRatio() # Create an instance of the D-R task
# Create an action/value neural net with an state space of 100 and an action space of 8
if type == 1:
    module = ActionValueNetwork( 99, 7 )
else:
    module = ActionValueNetwork( 51, 4 )
learner = NFQ()
learner.offPolicy = False # Disable off policy learning
#learner.explorer = HumanExplorer()
learner.explorer.epsilon = 0.4
#learner.explorer.decay = 0.99
agent = HumanAgent( module, learner, type ) # Create an agent that learns with NFQ
testagent = HumanAgent( module, None, type ) # Create a testing agent
experiment = CustomEpisodicExperiment( env, agent ) # Put the agent in the environment

if len( sys.argv ) == 4:
    print 'Loading saved net...'
    module.network = NetworkReader.readFrom( sys.argv[3] )

def save( history, net ):
    """
    This function gets called after each training/testing block or when the
Пример #7
0
from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork
from pybrain.rl.explorers import BoltzmannExplorer

from numpy import array, arange, meshgrid, pi, zeros, mean
from matplotlib import pyplot as plt

plt.ion()

env = CartPoleEnvironment()
renderer = CartPoleRenderer()
env.setRenderer(renderer)
# renderer.start()

module = ActionValueNetwork(2, 3)
task = DiscreteBalanceTask(env, 50)
learner = NFQ()
learner.explorer = BoltzmannExplorer()
agent = LearningAgent(module, learner)
testagent = LearningAgent(module, None)
experiment = EpisodicExperiment(task, agent)

def plotStateValues(module, fig):
    plt.figure(fig.number)
    theta_ = arange(-pi/2, pi/2, 0.1)
    v_ = arange(-5, 5, 0.3)
    
    X,Y = meshgrid(theta_, v_)
    X = X.flatten()
    Y = Y.flatten()
    
    Q = zeros(len(theta_) * len(v_))
Пример #8
0
        EpisodicTask.reset(self)
        self.env.reset()

    @property
    def indim(self):
        return self.env.indim

    @property
    def outdim(self):
        return self.env.outdim


env = TetrisEnv(10, 20)  #Tetris
task = TetrisTask(env)

QNet = ActionValueNetwork(10 * 20 + 11, 6)

learner = NFQ()
#Q()?
learner._setExplorer(EpsilonGreedyExplorer(0.2, decay=0.99))

agent = LearningAgent(QNet, learner)

experiment = EpisodicExperiment(task, agent)

while True:
    experiment.doEpisodes(1)
    agent.learn()
    agent.reset()  #or call more sporadically...?
    task.reset()