def train(self, transitionSamples):

        print "Entrenando..."

        k = 0
        trainer = RPropMinusTrainer(self.Q, batchlearning=True)
        #trainer = BackpropTrainer(self.Q, batchlearning=False)
        TS = SupervisedDataSet(4, 1)

        while (k < self._epochs):

            if k % 10 == 0:
                print "\t ", k

            # Genero training set en base a las muestras
            # Input: Vector de 4 dimensiones (angulo, vel.angular, pos, accion)
            # Target: Valor

            TS.clear()

            for s, a, s_1, costo in transitionSamples:

                # Tomo Q para s', para todas las acciones posibles
                # (vector con el valor para s', para cada una de las 3 acciones posibles)
                # Q_s1 = [ self.Q.activate([s_1.angulo, s_1.velocidadAngular, s_1.posicion, b]) for b in range(Accion.maxValor + 1) ]
                valDerecha = self.Q.activate([
                    s_1.angulo, s_1.velocidadAngular, s_1.posicion,
                    Accion.DERECHA
                ])
                valIzquierda = self.Q.activate([
                    s_1.angulo, s_1.velocidadAngular, s_1.posicion,
                    Accion.IZQUIERDA
                ])

                if valDerecha >= 1 or valDerecha <= 0:
                    print "Q incorrecta: ", valDerecha

                if valIzquierda >= 1 or valIzquierda <= 0:
                    print "Q incorrecta: ", valIzquierda

                # Input y Target para la red neuronal
                inputVal = (s.angulo, s.velocidadAngular, s.posicion, a)

                if costo == 0:
                    targetVal = costo
                else:
                    targetVal = costo + self._gamma * min(
                        valDerecha, valIzquierda)

                if targetVal > 1 or targetVal < 0:
                    print "Target incorrecto: ", targetVal

                TS.addSample(inputVal, targetVal)

            # Entreno la red neuronal
            trainer.setData(TS)
            trainer.train()  # 1 epoch
            #trainer.trainEpochs(self._epochsNN)

            k = k + 1
Exemplo n.º 2
0
from utils import updateDataset,buildDataset,buildRecurrentNetwork,loadRecurrentNetwork
from pybrain.supervised.trainers.rprop import RPropMinusTrainer
from pybrain.tools.xml.networkwriter import NetworkWriter
from pybrain.tools.xml.networkreader import NetworkReader

#nn=buildRecurrentNetwork()
nn=loadRecurrentNetwork('recurrentNetwork.xml')
dataset=buildDataset()

trainer=RPropMinusTrainer(nn)
trainer.setData(dataset)
print 'dataset set for trainer'
trainer.trainUntilConvergence()
print 'trained to convergence'


NetworkWriter.writeToFile(nn,'recurrentNetwork.xml')
Exemplo n.º 3
0
from utils import updateDataset, buildDataset, buildRecurrentNetwork, loadRecurrentNetwork
from pybrain.supervised.trainers.rprop import RPropMinusTrainer
from pybrain.tools.xml.networkwriter import NetworkWriter
from pybrain.tools.xml.networkreader import NetworkReader

#nn=buildRecurrentNetwork()
nn = loadRecurrentNetwork('recurrentNetwork.xml')
dataset = buildDataset()

trainer = RPropMinusTrainer(nn)
trainer.setData(dataset)
print 'dataset set for trainer'
trainer.trainUntilConvergence()
print 'trained to convergence'

NetworkWriter.writeToFile(nn, 'recurrentNetwork.xml')