Пример #1
0
class XNES(DistributionBasedOptimizer):
    """ NES with exponential parameter representation. """

    # parameters, which can be set but have a good (adapted) default value
    covLearningRate = None
    centerLearningRate = 1.0
    scaleLearningRate = None
    uniformBaseline = True
    batchSize = None
    shapingFunction = HansenRanking()
    importanceMixing = False
    forcedRefresh = 0.01

    # fixed settings
    mustMaximize = True
    storeAllEvaluations = True
    storeAllEvaluated = True
    storeAllDistributions = False

    def _additionalInit(self):
        # good heuristic default parameter settings
        dim = self.numParameters
        if self.covLearningRate is None:
            self.covLearningRate = 0.6 * (3 + log(dim)) / dim / sqrt(dim)
        if self.scaleLearningRate is None:
            self.scaleLearningRate = self.covLearningRate
        if self.batchSize is None:
            if self.importanceMixing:
                self.batchSize = 10 * dim
            else:
                self.batchSize = 4 + int(floor(3 * log(dim)))

        # some bookkeeping variables
        self._center = self._initEvaluable.copy()
        self._A = eye(self.numParameters)  # square root of covariance matrix
        self._invA = eye(self.numParameters)
        self._logDetA = 0.
        self._allPointers = []
        self._allGenSteps = [0]
        if self.storeAllDistributions:
            self._allDistributions = [(self._center.copy(), self._A.copy())]

    def _learnStep(self):
        """ Main part of the algorithm. """
        I = eye(self.numParameters)
        self._produceSamples()
        utilities = self.shapingFunction(self._currentEvaluations)
        utilities /= sum(utilities)  # make the utilities sum to 1
        if self.uniformBaseline:
            utilities -= 1. / self.batchSize
        samples = array(map(self._base2sample, self._population))

        dCenter = dot(samples.T, utilities)
        covGradient = dot(
            array([outer(s, s) - I for s in samples]).T, utilities)
        covTrace = trace(covGradient)
        covGradient -= covTrace / self.numParameters * I
        dA = 0.5 * (self.scaleLearningRate * covTrace / self.numParameters * I
                    + self.covLearningRate * covGradient)

        self._lastLogDetA = self._logDetA
        self._lastInvA = self._invA

        self._center += self.centerLearningRate * dot(self._A, dCenter)
        self._A = dot(self._A, expm(dA))
        self._invA = dot(expm(-dA), self._invA)
        self._logDetA += 0.5 * self.scaleLearningRate * covTrace
        if self.storeAllDistributions:
            self._allDistributions.append(
                (self._center.copy(), self._A.copy()))

    @property
    def _lastA(self):
        return self._allDistributions[-2][1]

    @property
    def _lastCenter(self):
        return self._allDistributions[-2][0]

    @property
    def _population(self):
        if self._wasUnwrapped:
            return [self._allEvaluated[i].params for i in self._pointers]
        else:
            return [self._allEvaluated[i] for i in self._pointers]

    @property
    def _currentEvaluations(self):
        fits = [self._allEvaluations[i] for i in self._pointers]
        if self._wasOpposed:
            fits = map(lambda x: -x, fits)
        return fits

    def _produceSample(self):
        return randn(self.numParameters)

    def _sample2base(self, sample):
        """ How does a sample look in the outside (base problem) coordinate system? """
        return dot(self._A, sample) + self._center

    def _base2oldsample(self, e):
        """ How would the point have looked in the previous reference coordinates? """
        return dot(self._lastInvA, (e - self._lastCenter))

    def _base2sample(self, e):
        """ How does the point look in the present one reference coordinates? """
        return dot(self._invA, (e - self._center))

    def _oldpdf(self, s):
        s = self._base2oldsample(self._sample2base(s))
        return exp(-0.5 * dot(s, s) - self._lastLogDetA)

    def _newpdf(self, s):
        return exp(-0.5 * dot(s, s) - self._logDetA)

    def _produceSamples(self):
        """ Append batch size new samples and evaluate them. """
        reuseindices = []
        if self.numLearningSteps == 0 or not self.importanceMixing:
            [
                self._oneEvaluation(self._sample2base(self._produceSample()))
                for _ in range(self.batchSize)
            ]
            self._pointers = list(
                range(
                    len(self._allEvaluated) - self.batchSize,
                    len(self._allEvaluated)))
        else:
            reuseindices, newpoints = importanceMixing(
                map(self._base2sample, self._currentEvaluations), self._oldpdf,
                self._newpdf, self._produceSample, self.forcedRefresh)
            [self._oneEvaluation(self._sample2base(s)) for s in newpoints]
            self._pointers = ([self._pointers[i]
                               for i in reuseindices] + range(
                                   len(self._allEvaluated) - self.batchSize +
                                   len(reuseindices), len(self._allEvaluated)))
        self._allGenSteps.append(self._allGenSteps[-1] + self.batchSize -
                                 len(reuseindices))
        self._allPointers.append(self._pointers)
Пример #2
0
class SNES(DistributionBasedOptimizer):
    """ Separable NES (diagonal). 
    [As described in Schaul, Glasmachers and Schmidhuber (GECCO'11)]
    """
    
    # parameters, which can be set but have a good (adapted) default value
    centerLearningRate = 1.0
    covLearningRate = None     
    batchSize = None     
    uniformBaseline = True
    shapingFunction = HansenRanking()
    initVariance = 1.
    
    # fixed settings
    mustMaximize = True
    storeAllEvaluations = True    
    storeAllEvaluated = True
    
    # for very long runs, we don't want to run out of memory
    clearStorage = False    
            
    # minimal setting where to abort the search
    varianceCutoff = 1e-20
            
    def _stoppingCriterion(self):
        if DistributionBasedOptimizer._stoppingCriterion(self):
            return True
        elif max(abs(self._sigmas)) < self.varianceCutoff:   
            return True
        else:
            return False
            
    def _initLearningRate(self):
        """ Careful, robust default value. """
        return 0.6 * (3 + log(self.numParameters)) / 3 / sqrt(self.numParameters)
        
    def _initBatchSize(self):
        """ as in CMA-ES """
        return 4 + int(floor(3 * log(self.numParameters)))    
    
    def _additionalInit(self):
        if self.covLearningRate is None:
            self.covLearningRate = self._initLearningRate()        
        if self.batchSize is None:
            self.batchSize = self._initBatchSize()                           
            
        self._center = self._initEvaluable.copy()          
        self._sigmas = ones(self.numParameters) * self.initVariance
    
    @property
    def _population(self):
        return [self._allEvaluated[i] for i in self._pointers]
        
    @property
    def _currentEvaluations(self):        
        fits = [self._allEvaluations[i] for i in self._pointers]
        if self._wasOpposed:
            fits = map(lambda x:-x, fits)
        return fits
                        
    def _produceSample(self):
        return randn(self.numParameters)
            
    def _sample2base(self, sample):       
        """ How does a sample look in the outside (base problem) coordinate system? """ 
        return self._sigmas * sample + self._center
              
    def _base2sample(self, e):
        """ How does the point look in the present one reference coordinates? """
        return (e - self._center) / self._sigmas
    
    def _produceSamples(self):
        """ Append batch size new samples and evaluate them. """
        if self.clearStorage:
            self._allEvaluated = []
            self._allEvaluations = []
            
        tmp = [self._sample2base(self._produceSample()) for _ in range(self.batchSize)]
        map(self._oneEvaluation, tmp)            
        self._pointers = list(range(len(self._allEvaluated) - self.batchSize, len(self._allEvaluated)))                    
            
    def _learnStep(self):
        # produce samples
        self._produceSamples()
        samples = map(self._base2sample, self._population) 
        
        #compute utilities
        utilities = self.shapingFunction(self._currentEvaluations)
        utilities /= sum(utilities)  # make the utilities sum to 1
        if self.uniformBaseline:
            utilities -= 1. / self.batchSize                           
                    
        # update center
        dCenter = dot(utilities, samples)
        self._center += self.centerLearningRate * self._sigmas * dCenter
        
        # update variances
        covGradient = dot(utilities, [s ** 2 - 1 for s in samples])        
        dA = 0.5 * self.covLearningRate * covGradient                                
        self._sigmas = self._sigmas * exp(dA)            
Пример #3
0
class Rank1NES(DistributionBasedOptimizer):
    """ Natural Evolution Strategies with rank-1 covariance matrices. 
    
    See http://arxiv.org/abs/1106.1998 for a description. """

    # parameters, which can be set but have a good (adapted) default value
    centerLearningRate = 1.0
    scaleLearningRate = None
    covLearningRate = None
    batchSize = None
    uniformBaseline = True
    shapingFunction = HansenRanking()

    # fixed settings
    mustMaximize = True
    storeAllEvaluations = True
    storeAllEvaluated = True
    storeAllDistributions = True
    storeAllRates = True
    verboseGaps = 1
    initVariance = 1.
    varianceCutoff = 1e-20

    def _additionalInit(self):
        # heuristic settings
        if self.covLearningRate is None:
            self.covLearningRate = self._initLearningRate()
        if self.scaleLearningRate is None:
            self.scaleLearningRate = self.covLearningRate
        if self.batchSize is None:
            self.batchSize = self._initBatchSize()

        # other initializations
        self._center = self._initEvaluable.copy()
        self._logDetA = log(self.initVariance) / 2
        self._principalVector = randn(self.numParameters)
        self._principalVector /= sqrt(
            dot(self._principalVector, self._principalVector))
        self._allDistributions = [
            (self._center.copy(), self._principalVector.copy(), self._logDetA)
        ]
        self.covLearningRate = 0.1
        self.batchSize = int(
            max(5, max(4 * log2(self.numParameters),
                       0.2 * self.numParameters)))
        self.uniformBaseline = False
        self.scaleLearningRate = 0.1

    def _stoppingCriterion(self):
        if DistributionBasedOptimizer._stoppingCriterion(self):
            return True
        elif self._getMaxVariance < self.varianceCutoff:
            return True
        else:
            return False

    @property
    def _getMaxVariance(self):
        return exp(self._logDetA * 2 / self.numParameters)

    def _initLearningRate(self):
        return 0.6 * (3 + log(self.numParameters)) / self.numParameters / sqrt(
            self.numParameters)

    def _initBatchSize(self):
        return 4 + int(floor(3 * log(self.numParameters)))

    @property
    def _population(self):
        return [self._allEvaluated[i] for i in self._pointers]

    @property
    def _currentEvaluations(self):
        fits = [self._allEvaluations[i] for i in self._pointers]
        if self._wasOpposed:
            fits = [-x for x in fits]
        return fits

    def _produceSample(self):
        return randn(self.numParameters + 1)

    def _produceSamples(self):
        """ Append batch size new samples and evaluate them. """
        tmp = [
            self._sample2base(self._produceSample())
            for _ in range(self.batchSize)
        ]
        list(map(self._oneEvaluation, tmp))
        self._pointers = list(
            range(
                len(self._allEvaluated) - self.batchSize,
                len(self._allEvaluated)))

    def _notify(self):
        """ Provide some feedback during the run. """
        if self.verbose:
            if self.numEvaluations % self.verboseGaps == 0:
                print('Step:',
                      self.numLearningSteps,
                      'best:',
                      self.bestEvaluation,
                      end=' ')
                print('logVar', round(self._logDetA, 3), end=' ')
                print(
                    'log|vector|',
                    round(
                        log(dot(self._principalVector, self._principalVector))
                        / 2, 3))

        if self.listener is not None:
            self.listener(self.bestEvaluable, self.bestEvaluation)

    def _learnStep(self):
        # concatenations of y vector and z value
        samples = [self._produceSample() for _ in range(self.batchSize)]

        u = self._principalVector
        a = self._logDetA

        # unnamed in paper (y+zu), or x/exp(lambda)
        W = [s[:-1] + u * s[-1] for s in samples]
        points = [self._center + exp(a) * w for w in W]

        list(map(self._oneEvaluation, points))

        self._pointers = list(
            range(
                len(self._allEvaluated) - self.batchSize,
                len(self._allEvaluated)))

        utilities = self.shapingFunction(self._currentEvaluations)
        utilities /= sum(utilities)  # make the utilities sum to 1
        if self.uniformBaseline:
            utilities -= 1. / self.batchSize

        W = [w for i, w in enumerate(W) if utilities[i] != 0]
        utilities = [uw for uw in utilities if uw != 0]

        dim = self.numParameters

        r = sqrt(dot(u, u))
        v = u / r
        c = log(r)

        #inner products, but not scaled with exp(lambda)
        wws = array([dot(w, w) for w in W])
        wvs = array([dot(v, w) for w in W])
        wv2s = array([wv**2 for wv in wvs])

        dCenter = exp(self._logDetA) * dot(utilities, W)
        self._center += self.centerLearningRate * dCenter

        kp = ((r**2 - dim + 2) * wv2s - (r**2 + 1) * wws) / (2 * r *
                                                             (dim - 1.))

        # natural gradient on lambda, equation (5)
        da = 1. / (2 * (dim - 1)) * dot((wws - dim) - (wv2s - 1), utilities)

        # natural gradient on u, equation (6)
        du = dot(kp, utilities) * v + dot(multiply(wvs / r, utilities), W)

        # equation (7)
        dc = dot(du, v) / r

        # equation (8)
        dv = du / r - dc * v

        epsilon = min(self.covLearningRate, 2 * sqrt(r**2 / dot(du, du)))
        if dc > 0:
            # additive update
            self._principalVector += epsilon * du
        else:
            # multiplicative update
            # prevents instability
            c += epsilon * dc
            v += epsilon * dv
            v /= sqrt(dot(v, v))
            r = exp(c)
            self._principalVector = r * v

        self._lastLogDetA = self._logDetA
        self._logDetA += self.scaleLearningRate * da

        if self.storeAllDistributions:
            self._allDistributions.append(
                (self._center.copy(), self._principalVector.copy(),
                 self._logDetA))