Exemplo n.º 1
0
      def perform_gradient_descent(self,chromosome):
	  from pybrain.optimization import ExactNES, OriginalNES
	  inp = numpy.array([v for v in chromosome])
	  
	  if self.num_eval != 0:
		#bf = BoundsSafeFunction(self.func,self.bounds)
		l = ExactNES(objF, inp[:],rangemins=self.mins,rangemaxs=self.maxs,learningRate=0.01,initCovariances=numpy.eye(len(bounds))*0.1)
		l.minimize = True
		l.maxEvaluations = self.num_eval
		#l.rangemins = self.mins
		#l.rangemaxs = self.maxs
		(new_K,success) = l.learn()
	  	for i in xrange(0,len(chromosome)):
	  		chromosome[i] = new_K[i]
		score = objF(numpy.array(new_K))			
	  return score
Exemplo n.º 2
0
    def fit(self, sast=None, r=None):
        if sast is not None:
            next_states_idx = self.state_dim + self.action_dim
            self._sa = sast[:, :next_states_idx]
            self._snext = sast[:, next_states_idx:-1]
            self._absorbing = sast[:, -1]
        if r is not None:
            self._r = r

        old_theta = self._estimator._regressor.theta

        self._optimizer = ExactNES(self._fitness,
                                   self._get_rho(),
                                   minimize=True,
                                   batchSize=100)

        rho, score = self._optimizer.learn()
        self._estimator._regressor.theta = self._f(rho)

        self._iteration += 1

        return (self._estimator._regressor.theta,
                np.sum(self._estimator._regressor.theta - old_theta)**2)
Exemplo n.º 3
0
    def fit(self, sast, r):
        """
        Perform a run of PBO using input data sast and r.
        Note that if the dataset does not change between iterations, you can
        provide None inputs after the first iteration.

        Args:
            sast (numpy.array, None): the input in the dataset
            r (numpy.array, None): the output in the dataset
            **kwargs: additional parameters to be provided to the fit function
            of the estimator

        Returns:
            the history of the parameters used to update the q regressor
        """
        self.iteration_best_rho_value = np.inf

        next_states_idx = self.state_dim + self.action_dim
        self._sa = sast[:, :next_states_idx]
        self._snext = sast[:, next_states_idx:-1]
        self._absorbing = sast[:, -1]
        self._r = r

        optimizer = ExactNES(self._fitness,
                             self._get_rho(),
                             minimize=True,
                             batchSize=self._batch_size,
                             learningRate=self._learning_rate,
                             maxLearningSteps=self._learning_steps - 1,
                             importanceMixing=False,
                             maxEvaluations=None)
        optimizer.listener = self.my_listener
        optimizer.learn()
        self._q_weights_list.append(self._get_q_weights())

        return self._q_weights_list
Exemplo n.º 4
0
from pybrain.optimization import ExactNES
from pybrain.rl.experiments import EpisodicExperiment

batch = 2  #number of samples per learning step
prnts = 100  #number of learning steps after results are printed
epis = 4000 / batch / prnts  #number of roleouts
numbExp = 10  #number of experiments
et = ExTools(batch, prnts)  #tool for printing and plotting

for runs in range(numbExp):
    # create environment
    env = CartPoleEnvironment()
    # create task
    task = BalanceTask(env, 200, desiredValue=None)
    # create controller network
    net = buildNetwork(4, 1, bias=False)
    # create agent with controller and learner (and its options)
    agent = OptimizationAgent(net, ExactNES(storeAllEvaluations=True))
    et.agent = agent
    # create the experiment
    experiment = EpisodicExperiment(task, agent)

    #Do the experiment
    for updates in range(epis):
        for i in range(prnts):
            experiment.doEpisodes(batch)
        print "Epsilon   : ", agent.learner.sigma
        et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates)
    et.addExps()
et.showExps()