class XNES(DistributionBasedOptimizer): """ NES with exponential parameter representation. """ # parameters, which can be set but have a good (adapted) default value covLearningRate = None centerLearningRate = 1.0 scaleLearningRate = None uniformBaseline = True batchSize = None shapingFunction = HansenRanking() importanceMixing = False forcedRefresh = 0.01 # fixed settings mustMaximize = True storeAllEvaluations = True storeAllEvaluated = True storeAllDistributions = False def _additionalInit(self): # good heuristic default parameter settings dim = self.numParameters if self.covLearningRate is None: self.covLearningRate = 0.6 * (3 + log(dim)) / dim / sqrt(dim) if self.scaleLearningRate is None: self.scaleLearningRate = self.covLearningRate if self.batchSize is None: if self.importanceMixing: self.batchSize = 10 * dim else: self.batchSize = 4 + int(floor(3 * log(dim))) # some bookkeeping variables self._center = self._initEvaluable.copy() self._A = eye(self.numParameters) # square root of covariance matrix self._invA = eye(self.numParameters) self._logDetA = 0. self._allPointers = [] self._allGenSteps = [0] if self.storeAllDistributions: self._allDistributions = [(self._center.copy(), self._A.copy())] def _learnStep(self): """ Main part of the algorithm. """ I = eye(self.numParameters) self._produceSamples() utilities = self.shapingFunction(self._currentEvaluations) utilities /= sum(utilities) # make the utilities sum to 1 if self.uniformBaseline: utilities -= 1. / self.batchSize samples = array(map(self._base2sample, self._population)) dCenter = dot(samples.T, utilities) covGradient = dot( array([outer(s, s) - I for s in samples]).T, utilities) covTrace = trace(covGradient) covGradient -= covTrace / self.numParameters * I dA = 0.5 * (self.scaleLearningRate * covTrace / self.numParameters * I + self.covLearningRate * covGradient) self._lastLogDetA = self._logDetA self._lastInvA = self._invA self._center += self.centerLearningRate * dot(self._A, dCenter) self._A = dot(self._A, expm(dA)) self._invA = dot(expm(-dA), self._invA) self._logDetA += 0.5 * self.scaleLearningRate * covTrace if self.storeAllDistributions: self._allDistributions.append( (self._center.copy(), self._A.copy())) @property def _lastA(self): return self._allDistributions[-2][1] @property def _lastCenter(self): return self._allDistributions[-2][0] @property def _population(self): if self._wasUnwrapped: return [self._allEvaluated[i].params for i in self._pointers] else: return [self._allEvaluated[i] for i in self._pointers] @property def _currentEvaluations(self): fits = [self._allEvaluations[i] for i in self._pointers] if self._wasOpposed: fits = map(lambda x: -x, fits) return fits def _produceSample(self): return randn(self.numParameters) def _sample2base(self, sample): """ How does a sample look in the outside (base problem) coordinate system? """ return dot(self._A, sample) + self._center def _base2oldsample(self, e): """ How would the point have looked in the previous reference coordinates? """ return dot(self._lastInvA, (e - self._lastCenter)) def _base2sample(self, e): """ How does the point look in the present one reference coordinates? """ return dot(self._invA, (e - self._center)) def _oldpdf(self, s): s = self._base2oldsample(self._sample2base(s)) return exp(-0.5 * dot(s, s) - self._lastLogDetA) def _newpdf(self, s): return exp(-0.5 * dot(s, s) - self._logDetA) def _produceSamples(self): """ Append batch size new samples and evaluate them. """ reuseindices = [] if self.numLearningSteps == 0 or not self.importanceMixing: [ self._oneEvaluation(self._sample2base(self._produceSample())) for _ in range(self.batchSize) ] self._pointers = list( range( len(self._allEvaluated) - self.batchSize, len(self._allEvaluated))) else: reuseindices, newpoints = importanceMixing( map(self._base2sample, self._currentEvaluations), self._oldpdf, self._newpdf, self._produceSample, self.forcedRefresh) [self._oneEvaluation(self._sample2base(s)) for s in newpoints] self._pointers = ([self._pointers[i] for i in reuseindices] + range( len(self._allEvaluated) - self.batchSize + len(reuseindices), len(self._allEvaluated))) self._allGenSteps.append(self._allGenSteps[-1] + self.batchSize - len(reuseindices)) self._allPointers.append(self._pointers)
class SNES(DistributionBasedOptimizer): """ Separable NES (diagonal). [As described in Schaul, Glasmachers and Schmidhuber (GECCO'11)] """ # parameters, which can be set but have a good (adapted) default value centerLearningRate = 1.0 covLearningRate = None batchSize = None uniformBaseline = True shapingFunction = HansenRanking() initVariance = 1. # fixed settings mustMaximize = True storeAllEvaluations = True storeAllEvaluated = True # for very long runs, we don't want to run out of memory clearStorage = False # minimal setting where to abort the search varianceCutoff = 1e-20 def _stoppingCriterion(self): if DistributionBasedOptimizer._stoppingCriterion(self): return True elif max(abs(self._sigmas)) < self.varianceCutoff: return True else: return False def _initLearningRate(self): """ Careful, robust default value. """ return 0.6 * (3 + log(self.numParameters)) / 3 / sqrt(self.numParameters) def _initBatchSize(self): """ as in CMA-ES """ return 4 + int(floor(3 * log(self.numParameters))) def _additionalInit(self): if self.covLearningRate is None: self.covLearningRate = self._initLearningRate() if self.batchSize is None: self.batchSize = self._initBatchSize() self._center = self._initEvaluable.copy() self._sigmas = ones(self.numParameters) * self.initVariance @property def _population(self): return [self._allEvaluated[i] for i in self._pointers] @property def _currentEvaluations(self): fits = [self._allEvaluations[i] for i in self._pointers] if self._wasOpposed: fits = map(lambda x:-x, fits) return fits def _produceSample(self): return randn(self.numParameters) def _sample2base(self, sample): """ How does a sample look in the outside (base problem) coordinate system? """ return self._sigmas * sample + self._center def _base2sample(self, e): """ How does the point look in the present one reference coordinates? """ return (e - self._center) / self._sigmas def _produceSamples(self): """ Append batch size new samples and evaluate them. """ if self.clearStorage: self._allEvaluated = [] self._allEvaluations = [] tmp = [self._sample2base(self._produceSample()) for _ in range(self.batchSize)] map(self._oneEvaluation, tmp) self._pointers = list(range(len(self._allEvaluated) - self.batchSize, len(self._allEvaluated))) def _learnStep(self): # produce samples self._produceSamples() samples = map(self._base2sample, self._population) #compute utilities utilities = self.shapingFunction(self._currentEvaluations) utilities /= sum(utilities) # make the utilities sum to 1 if self.uniformBaseline: utilities -= 1. / self.batchSize # update center dCenter = dot(utilities, samples) self._center += self.centerLearningRate * self._sigmas * dCenter # update variances covGradient = dot(utilities, [s ** 2 - 1 for s in samples]) dA = 0.5 * self.covLearningRate * covGradient self._sigmas = self._sigmas * exp(dA)
class Rank1NES(DistributionBasedOptimizer): """ Natural Evolution Strategies with rank-1 covariance matrices. See http://arxiv.org/abs/1106.1998 for a description. """ # parameters, which can be set but have a good (adapted) default value centerLearningRate = 1.0 scaleLearningRate = None covLearningRate = None batchSize = None uniformBaseline = True shapingFunction = HansenRanking() # fixed settings mustMaximize = True storeAllEvaluations = True storeAllEvaluated = True storeAllDistributions = True storeAllRates = True verboseGaps = 1 initVariance = 1. varianceCutoff = 1e-20 def _additionalInit(self): # heuristic settings if self.covLearningRate is None: self.covLearningRate = self._initLearningRate() if self.scaleLearningRate is None: self.scaleLearningRate = self.covLearningRate if self.batchSize is None: self.batchSize = self._initBatchSize() # other initializations self._center = self._initEvaluable.copy() self._logDetA = log(self.initVariance) / 2 self._principalVector = randn(self.numParameters) self._principalVector /= sqrt( dot(self._principalVector, self._principalVector)) self._allDistributions = [ (self._center.copy(), self._principalVector.copy(), self._logDetA) ] self.covLearningRate = 0.1 self.batchSize = int( max(5, max(4 * log2(self.numParameters), 0.2 * self.numParameters))) self.uniformBaseline = False self.scaleLearningRate = 0.1 def _stoppingCriterion(self): if DistributionBasedOptimizer._stoppingCriterion(self): return True elif self._getMaxVariance < self.varianceCutoff: return True else: return False @property def _getMaxVariance(self): return exp(self._logDetA * 2 / self.numParameters) def _initLearningRate(self): return 0.6 * (3 + log(self.numParameters)) / self.numParameters / sqrt( self.numParameters) def _initBatchSize(self): return 4 + int(floor(3 * log(self.numParameters))) @property def _population(self): return [self._allEvaluated[i] for i in self._pointers] @property def _currentEvaluations(self): fits = [self._allEvaluations[i] for i in self._pointers] if self._wasOpposed: fits = [-x for x in fits] return fits def _produceSample(self): return randn(self.numParameters + 1) def _produceSamples(self): """ Append batch size new samples and evaluate them. """ tmp = [ self._sample2base(self._produceSample()) for _ in range(self.batchSize) ] list(map(self._oneEvaluation, tmp)) self._pointers = list( range( len(self._allEvaluated) - self.batchSize, len(self._allEvaluated))) def _notify(self): """ Provide some feedback during the run. """ if self.verbose: if self.numEvaluations % self.verboseGaps == 0: print('Step:', self.numLearningSteps, 'best:', self.bestEvaluation, end=' ') print('logVar', round(self._logDetA, 3), end=' ') print( 'log|vector|', round( log(dot(self._principalVector, self._principalVector)) / 2, 3)) if self.listener is not None: self.listener(self.bestEvaluable, self.bestEvaluation) def _learnStep(self): # concatenations of y vector and z value samples = [self._produceSample() for _ in range(self.batchSize)] u = self._principalVector a = self._logDetA # unnamed in paper (y+zu), or x/exp(lambda) W = [s[:-1] + u * s[-1] for s in samples] points = [self._center + exp(a) * w for w in W] list(map(self._oneEvaluation, points)) self._pointers = list( range( len(self._allEvaluated) - self.batchSize, len(self._allEvaluated))) utilities = self.shapingFunction(self._currentEvaluations) utilities /= sum(utilities) # make the utilities sum to 1 if self.uniformBaseline: utilities -= 1. / self.batchSize W = [w for i, w in enumerate(W) if utilities[i] != 0] utilities = [uw for uw in utilities if uw != 0] dim = self.numParameters r = sqrt(dot(u, u)) v = u / r c = log(r) #inner products, but not scaled with exp(lambda) wws = array([dot(w, w) for w in W]) wvs = array([dot(v, w) for w in W]) wv2s = array([wv**2 for wv in wvs]) dCenter = exp(self._logDetA) * dot(utilities, W) self._center += self.centerLearningRate * dCenter kp = ((r**2 - dim + 2) * wv2s - (r**2 + 1) * wws) / (2 * r * (dim - 1.)) # natural gradient on lambda, equation (5) da = 1. / (2 * (dim - 1)) * dot((wws - dim) - (wv2s - 1), utilities) # natural gradient on u, equation (6) du = dot(kp, utilities) * v + dot(multiply(wvs / r, utilities), W) # equation (7) dc = dot(du, v) / r # equation (8) dv = du / r - dc * v epsilon = min(self.covLearningRate, 2 * sqrt(r**2 / dot(du, du))) if dc > 0: # additive update self._principalVector += epsilon * du else: # multiplicative update # prevents instability c += epsilon * dc v += epsilon * dv v /= sqrt(dot(v, v)) r = exp(c) self._principalVector = r * v self._lastLogDetA = self._logDetA self._logDetA += self.scaleLearningRate * da if self.storeAllDistributions: self._allDistributions.append( (self._center.copy(), self._principalVector.copy(), self._logDetA))