class Team(object):
    def __init__(self, living, task, learner = ENAC()):
        self.living = living
        self.task = task
        self.last_reward = 0
        self.agent = LearningAgent(self.living.brain, learner)
        self.oldparams = self.living.brain.params
    def Interaction(self):
        self.agent.integrateObservation(self.task.getObservation())
        self.task.performAction(self.agent.getAction())
        self.last_reward = self.task.getReward()
        self.agent.giveReward(self.last_reward)
        
        finished = self.task.isFinished()
        if finished:
            #print task.cumreward
            self.agent.newEpisode()
            self.task.reset()
        return self.last_reward, finished
    
    def Learn(self, episodes = 1):    
        self.agent.learn(episodes)
        self.agent.reset()
                        
        newparams = self.living.brain.params.copy() #get_all_weights(eater.brain)[:]
        dif = 0
        j = 0
        for i in newparams:
            dif += (self.oldparams[j] - newparams[j])**2
            j += 1
        self.oldparams = newparams
        return dif
示例#2
0
Program: NFQ_EXAMPLE.PY
Date: Thursday, March  1 2012
Description: Test NFQ on my cartpole simulation.
"""

from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork
from cartpole import CartPole
import numpy as np

module = ActionValueNetwork(4,2)
learner = NFQ()
learner.explorer.epsilon = 0.4
agent = LearningAgent(module, learner)

env = CartPole()
cnt = 0
for i in range(1000):
    
    env.reset()
    print "Episode: %d, Count: %d" % (i,cnt)
    cnt = 0
    while not env.failure():
        agent.integrateObservation(env.observation())
        action = agent.getAction()
        pstate, paction, reward, state = env.move(action)
        cnt += 1
        agent.giveReward(reward)
    agent.learn(1)

示例#3
0
for x in xrange(1,100):
    # The training 
    listxor = random.choice([[0, 0],[0, 1], [1, 0], [1, 1]])
    qstate = listxor[0] + listxor[1]*2
    resultxor = listxor[0]^listxor[1]

    agent.integrateObservation([qstate])
    action = agent.getAction()


    if int(action) == resultxor:
        reward = 1
    else:
        reward = -1

    print "xor(",listxor,") = ", resultxor, " || action = " , action[0], "reward = ", reward    

    agent.giveReward(reward) # 1 for good answer, 0 for bad.
    agent.learn()

print "finished"


# A debugger...
# Test
print "test : "
print "[0, 0] ",  agent.learner.module.getMaxAction(0^0) # module.getMaxAction([0, 0])
print "[0, 1] ",  agent.learner.module.getMaxAction(0^1)
print "[1, 0] ",  agent.learner.module.getMaxAction(1^0)
print "[1, 1] ",  agent.learner.module.getMaxAction(1^1)
class SimulationMaster:

    def __init__(self, n_threads=4, initial_port=19997, q_table_version=0,
                 batch_size=None, learner=None, explorer=None):
        self.barrier = Barrier(n_threads + 1, timeout=720)
        self.n_threads = n_threads
        self.initial_port = initial_port
        self.batch_size = batch_size

        self.controller = MyActionValueTable(q_table_version)
        if learner is None:
            self.learner = Q(0.5, 0.9)
        else:
            self.learner = learner

        if explorer is None:
            self.explorer = self.learner.explorer = EpsilonGreedyExplorer(0.2, 0.998)
        else:
            self.explorer = self.learner.explorer = explorer
        self.agent = LearningAgent(self.controller, self.learner)
        # Logger initialization
        self.logger = logging.getLogger('master_logger')
        self.logger.setLevel(logging.DEBUG)
        self.logger.addHandler(logging.FileHandler(Utils.DATA_PATH + 'learning-tables/master.log'))
        self.failed_simulations = []
        self.n_episodes = 0
        self.simulations = []
        self.initialize_simulations()

    def initialize_simulations(self):
        self.simulations = []
        for i in range(self.n_threads):
            if self.batch_size is not None:
                self.simulations.append(Simulation(self, self.initial_port + i, self.batch_size))
            else:
                self.simulations.append(Simulation(self, self.initial_port + i))

    def get_action(self, observation):
        action = self.controller.activate(observation)
        action = self.explorer.activate(observation, action)
        return action

    def add_observation(self, obs):
        """
            Adds observation in the agent memory
            :param obs: 3 dimensional vector containing [observation, action, reward]
        """
        self.agent.integrateObservation(obs[0])
        self.agent.lastaction = obs[1]
        self.agent.giveReward(obs[2])

    def update_q_table(self):
        """
            Updates the q table with the new simulators observations
        """
        for sim in self.simulations:
            for trace in sim.traces:
                for obs in trace:
                    self.add_observation(obs)
                self.agent.learn()
                self.agent.reset()
                self.n_episodes += 1

            sim.traces.clear()
        if self.explorer.epsilon > 0.1:
      	    self.explorer.epsilon=self.explorer.epsilon*self.explorer.decay
        if self.learner.alpha > 0.1:
            self.learner.alpha *= 0.999
        self.logger.info('new epsilon: {}'.format(self.explorer.epsilon))
        self.logger.info('new alpha: {}'.format(self.learner.alpha))
        self.logger.info('n episodes: {}'.format(self.n_episodes))

    def save_t_table(self):
        """
            Saves t tables, one for each thread
        """
        for sim in self.simulations:
            sim.save_t_table()

    def run(self):

        self.controller.initialize(self.agent)
        for sim in self.simulations:
            sim.start()
        counter = 0
        while True:
            try:
                self.barrier.wait()  # wait until all simulations are done
                self.update_q_table()
                self.save_t_table()
                self.barrier.wait()  # Free simulations threads and start a new cycle
                # Counter to avoid to save q-table too often
                if counter == 5:
                    self.controller.save()
                    counter = 0
                else:
                    counter += 1
                while self.failed_simulations:
                    sim = self.failed_simulations.pop()
                    self.restart_simulation(sim)
            except BrokenBarrierError as e:
                self.logger.error('Broken Barrier Error Occurred')
                for sim in self.simulations:
                    sim.stop()
                for sim in self.simulations:
                    sim.join()
                del self.simulations
                self.initialize_simulations()
                self.barrier.reset()
                self.failed_simulations.clear()
                for sim in self.simulations:
                    sim.start()

    def restart_simulation(self, simulation):
        self.logger.info('Restarting simulation with port {}'.format(simulation.port))
        self.simulations.remove(simulation)
        new_simulation = Simulation(self, simulation.port)
        self.simulations.append(new_simulation)
        new_simulation.start()
        del simulation