def perform_gradient_descent(self,chromosome): from pybrain.optimization import ExactNES, OriginalNES inp = numpy.array([v for v in chromosome]) if self.num_eval != 0: #bf = BoundsSafeFunction(self.func,self.bounds) l = ExactNES(objF, inp[:],rangemins=self.mins,rangemaxs=self.maxs,learningRate=0.01,initCovariances=numpy.eye(len(bounds))*0.1) l.minimize = True l.maxEvaluations = self.num_eval #l.rangemins = self.mins #l.rangemaxs = self.maxs (new_K,success) = l.learn() for i in xrange(0,len(chromosome)): chromosome[i] = new_K[i] score = objF(numpy.array(new_K)) return score
def fit(self, sast=None, r=None): if sast is not None: next_states_idx = self.state_dim + self.action_dim self._sa = sast[:, :next_states_idx] self._snext = sast[:, next_states_idx:-1] self._absorbing = sast[:, -1] if r is not None: self._r = r old_theta = self._estimator._regressor.theta self._optimizer = ExactNES(self._fitness, self._get_rho(), minimize=True, batchSize=100) rho, score = self._optimizer.learn() self._estimator._regressor.theta = self._f(rho) self._iteration += 1 return (self._estimator._regressor.theta, np.sum(self._estimator._regressor.theta - old_theta)**2)
def fit(self, sast, r): """ Perform a run of PBO using input data sast and r. Note that if the dataset does not change between iterations, you can provide None inputs after the first iteration. Args: sast (numpy.array, None): the input in the dataset r (numpy.array, None): the output in the dataset **kwargs: additional parameters to be provided to the fit function of the estimator Returns: the history of the parameters used to update the q regressor """ self.iteration_best_rho_value = np.inf next_states_idx = self.state_dim + self.action_dim self._sa = sast[:, :next_states_idx] self._snext = sast[:, next_states_idx:-1] self._absorbing = sast[:, -1] self._r = r optimizer = ExactNES(self._fitness, self._get_rho(), minimize=True, batchSize=self._batch_size, learningRate=self._learning_rate, maxLearningSteps=self._learning_steps - 1, importanceMixing=False, maxEvaluations=None) optimizer.listener = self.my_listener optimizer.learn() self._q_weights_list.append(self._get_q_weights()) return self._q_weights_list
from pybrain.optimization import ExactNES from pybrain.rl.experiments import EpisodicExperiment batch = 2 #number of samples per learning step prnts = 100 #number of learning steps after results are printed epis = 4000 / batch / prnts #number of roleouts numbExp = 10 #number of experiments et = ExTools(batch, prnts) #tool for printing and plotting for runs in range(numbExp): # create environment env = CartPoleEnvironment() # create task task = BalanceTask(env, 200, desiredValue=None) # create controller network net = buildNetwork(4, 1, bias=False) # create agent with controller and learner (and its options) agent = OptimizationAgent(net, ExactNES(storeAllEvaluations=True)) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) print "Epsilon : ", agent.learner.sigma et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) et.addExps() et.showExps()