def fit(self, sast, r): """ Perform a run of PBO using input data sast and r. Note that if the dataset does not change between iterations, you can provide None inputs after the first iteration. Args: sast (numpy.array, None): the input in the dataset r (numpy.array, None): the output in the dataset **kwargs: additional parameters to be provided to the fit function of the estimator Returns: the history of the parameters used to update the q regressor """ self.iteration_best_rho_value = np.inf next_states_idx = self.state_dim + self.action_dim self._sa = sast[:, :next_states_idx] self._snext = sast[:, next_states_idx:-1] self._absorbing = sast[:, -1] self._r = r optimizer = ExactNES(self._fitness, self._get_rho(), minimize=True, batchSize=self._batch_size, learningRate=self._learning_rate, maxLearningSteps=self._learning_steps - 1, importanceMixing=False, maxEvaluations=None) optimizer.listener = self.my_listener optimizer.learn() self._q_weights_list.append(self._get_q_weights()) return self._q_weights_list
def experiment3(): l = ExactNES(fitnessFunction, myNetwork.params) l.minimize = True l.verbose = True l.maxLearningSteps = 1000 params, fitness = l.learn() myNetwork._setParameters(params) logNet()
def perform_gradient_descent(self,chromosome): from pybrain.optimization import ExactNES, OriginalNES inp = numpy.array([v for v in chromosome]) if self.num_eval != 0: #bf = BoundsSafeFunction(self.func,self.bounds) l = ExactNES(objF, inp[:],rangemins=self.mins,rangemaxs=self.maxs,learningRate=0.01,initCovariances=numpy.eye(len(bounds))*0.1) l.minimize = True l.maxEvaluations = self.num_eval #l.rangemins = self.mins #l.rangemaxs = self.maxs (new_K,success) = l.learn() for i in xrange(0,len(chromosome)): chromosome[i] = new_K[i] score = objF(numpy.array(new_K)) return score
def fit(self, sast=None, r=None): if sast is not None: next_states_idx = self.state_dim + self.action_dim self._sa = sast[:, :next_states_idx] self._snext = sast[:, next_states_idx:-1] self._absorbing = sast[:, -1] if r is not None: self._r = r old_theta = self._estimator._regressor.theta self._optimizer = ExactNES(self._fitness, self._get_rho(), minimize=True, batchSize=100) rho, score = self._optimizer.learn() self._estimator._regressor.theta = self._f(rho) self._iteration += 1 return (self._estimator._regressor.theta, np.sum(self._estimator._regressor.theta - old_theta)**2)
from pybrain.optimization import ExactNES from pybrain.rl.experiments import EpisodicExperiment batch = 2 #number of samples per learning step prnts = 100 #number of learning steps after results are printed epis = 4000 / batch / prnts #number of roleouts numbExp = 10 #number of experiments et = ExTools(batch, prnts) #tool for printing and plotting for runs in range(numbExp): # create environment env = CartPoleEnvironment() # create task task = BalanceTask(env, 200, desiredValue=None) # create controller network net = buildNetwork(4, 1, bias=False) # create agent with controller and learner (and its options) agent = OptimizationAgent(net, ExactNES(storeAllEvaluations=True)) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) print "Epsilon : ", agent.learner.sigma et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) et.addExps() et.showExps()
def learn(obj_fun, init_values): l = ExactNES(obj_fun, init_values, minimize=True, verbose=True) res = l.learn() return res[0]
class PBO(Algorithm): def __init__(self, estimator, state_dim, action_dim, discrete_actions, gamma, horizon, features=None, verbose=False): self._regressor_rho = Sequential() self._regressor_rho.add(Dense(30, input_shape=(2, ), activation='relu')) self._regressor_rho.add(Dense(2, activation='linear')) self._regressor_rho.compile(optimizer='rmsprop', loss='mse') super(PBO, self).__init__(estimator, state_dim, action_dim, discrete_actions, gamma, horizon, features, verbose) def fit(self, sast=None, r=None): if sast is not None: next_states_idx = self.state_dim + self.action_dim self._sa = sast[:, :next_states_idx] self._snext = sast[:, next_states_idx:-1] self._absorbing = sast[:, -1] if r is not None: self._r = r old_theta = self._estimator._regressor.theta self._optimizer = ExactNES(self._fitness, self._get_rho(), minimize=True, batchSize=100) rho, score = self._optimizer.learn() self._estimator._regressor.theta = self._f(rho) self._iteration += 1 return (self._estimator._regressor.theta, np.sum(self._estimator._regressor.theta - old_theta)**2) def _fitness(self, rho): Q = self._estimator.predict(self._sa, f_rho=self._f(rho)) maxQ, _ = self.maxQA(self._snext, self._absorbing) return np.mean((Q - self._r - self.gamma * maxQ)**2) def _f(self, rho): self._set_rho(rho) output = self._regressor_rho.predict(np.array( [self._estimator._regressor.theta]), batch_size=1).ravel() return output def _get_rho(self): rho = self._regressor_rho.get_weights() r = list() for i in rho: r += i.ravel().tolist() return np.array(r) def _set_rho(self, rho): weights = list() rho = rho.tolist() for l in self._regressor_rho.layers: w = l.get_weights()[0] b = l.get_weights()[1] weights.append(np.array(rho[:w.size]).reshape(w.shape)) del rho[:w.size] weights.append(np.array(rho[:b.size])) del rho[:b.size] self._regressor_rho.set_weights(weights)