示例#1
0
文件: score.py 项目: hypernicon/pyec
class BayesianDirichletScorer(object):
    def __init__(self):
        self.cache = {}
        self.varCache = LRUCache()

    def matchesPrior(self, data, configuration):
        return 1.0

    def matches(self, data, config):
        """count number of instances of configuration in data"""
        cnt = 0L
        for x in data:
            if config <= x:
                cnt += 1
        return cnt

    def __call__(self, network, data):
        network.computeEdgeStatistics()
        total = 0.0
        total -= network.edgeRatio * len(data) * 10
        for variable in network.variables:
            varKey = str(variable.index) + str(variable.parents)
            if self.varCache.has_key(varKey):
                total += self.varCache[varKey]
                continue
            start = total
            for configuration in variable.configurations():
                prior = self.matchesPrior(data, configuration)
                total += gammaln(prior)
                total -= gammaln(prior + self.matches(data, configuration))
                for val in variable.values():
                    priorVal = self.matchesPrior(data, configuration + val)
                    total -= gammaln(priorVal)
                    total += gammaln(priorVal +
                                     self.matches(data, configuration + val))

            self.varCache[varKey] = total - start
        return total / len(data)
示例#2
0
文件: score.py 项目: hypernicon/pyec
class BayesianDirichletScorer(object):
    def __init__(self):
        self.cache = {}
        self.varCache = LRUCache()

    def matchesPrior(self, data, configuration):
        return 1.0

    def matches(self, data, config):
        """count number of instances of configuration in data"""
        cnt = 0L
        for x in data:
            if config <= x:
                cnt += 1
        return cnt

    def __call__(self, network, data):
        network.computeEdgeStatistics()
        total = 0.0
        total -= network.edgeRatio * len(data) * 10
        for variable in network.variables:
            varKey = str(variable.index) + str(variable.parents)
            if self.varCache.has_key(varKey):
                total += self.varCache[varKey]
                continue
            start = total
            for configuration in variable.configurations():
                prior = self.matchesPrior(data, configuration)
                total += gammaln(prior)
                total -= gammaln(prior + self.matches(data, configuration))
                for val in variable.values():
                    priorVal = self.matchesPrior(data, configuration + val)
                    total -= gammaln(priorVal)
                    total += gammaln(priorVal + self.matches(data, configuration + val))

            self.varCache[varKey] = total - start
        return total / len(data)
示例#3
0
class History(object):
    """A History used to track the progress of an optimization algorithm.
    
    Different algorithms should extend this class in order to define
    the minimal amount of history that needs to be stored in order for the
    algorthm to operate.
    
    """
    useCache = True
    attrs = set()
    sorted = False
    root = True  # whether the current history is the top level root for

    # all histories of this algorithm

    def __init__(self, config):
        super(History, self).__init__()
        self.config = config
        self.evals = 0
        self.minSolution = None
        self.minScore = np.inf
        self.maxSolution = None
        self.maxScore = -np.inf
        self._empty = True
        if not hasattr(self, 'cache'):
            self.cache = LRUCache()  # 10,000 items by default
        self.updates = 0
        #how often to print generation report
        self.printEvery = config.printEvery or 1000000000000L
        self.attrs = set([
            "evals", "minSolution", "minScore", "maxScore", "attrs",
            "minSolution", "maxSolution", "_empty", "cache", "updates",
            "printEvery", "useCache"
        ])

    def __getstate__(self):
        """Used by :class:`CheckpointedHistory`
        and :class:`CheckpointedMultipleHistory` to checkpoint a history
        so it can be rolled back after updates.
        
        Should return all objects in the history in a dictionary, sensitive
        to the fact that object references may need to be copied.
        
        :returns: A dictionary with the state of the history
        
        """
        state = {}

        for attr in self.attrs:
            val = getattr(self, attr)
            if isinstance(val, list):
                val = [x for x in val]
            state[attr] = val

        state['cfg'] = self.config.__properties__
        return state

    def __setstate__(self, state):
        for attr in self.attrs:
            val = state[attr]
            setattr(self, attr, val)

        import pyec.config
        self.config = pyec.config.Config(**state['cfg'])

    def empty(self):
        """Whether the history has been used or not."""
        return self._empty

    def better(self, score1, score2):
        """Return whether one score is better than another.
        
        Uses ``config.minimize`` to decide whether lesser or greater
        numbers are better.
        
        :param score1: the score in question (floating point number)
        :type score1: ``float``
        :param score2: the score being compared
        :type score2: ``float``
        :returns: whether ``score1`` is better than ``score2``
        """
        if self.config.minimize:
            return score1 < score2
        else:
            return score1 > score2

    def best(self):
        """Get the best solution, whether minimizing or maximizing.
        
        Same as ``optimal``
        
        """
        if self.config.minimize:
            return self.minimal()
        else:
            return self.maximal()

    optimal = best

    def minimal(self):
        """Get the minimal solution and its score.
        
        :returns: A tuple of two item with the solution object first
                  and the score of that item second 
                  
        """
        return self.minSolution, self.minScore

    def maximal(self):
        """Get the maximal solution and its score.
        
        :returns: A tuple of two item with the solution object first
                  and the score of that item second 
                  
        """
        return self.maxSolution, self.maxScore

    def num_evaluations(self):
        """Get the number of function evaluations performed in this 
        history object.
        
        :returns: An integer representing the number of times the fitness
                  function or objective has been evaluated
                  
        """
        return self.evals

    def update(self, population, fitness, space, opt):
        """
         Update the state of the :class:`History` with the latest population 
         and its fitness scores. Subclasses should probably override
         ``internalUpdate`` rather than ``update``, unless they want to
         change how the min/max are tracked.
         
         Returns the history for use in continuations.
         
         If ``population`` is ``None``, then this method does nothing; this
         is so that you can set up a loop like to run an optimizer like::
         
         p = None
         f = lambda x: 
         t = History()
         o = some_optimizer
         s = o.config.space
         for i in xrange(generations):
             p = some_optimizer[t.update(p,f,s,0), f]()
         t.update(p,f,s,o)
          
         :params population: The previous population.
         :type population: list of points in the search domain
         :params fitness: The fitness / cost / objective function
         :type fitness: Any callable object
         :params space: The search domain
         :type space: :class:`Space`
         :params opt: The optimizer reporting this population
         :type opt: :class:`PopulationDistribution`
         :returns: The history (``self``), for continuations
         
        """
        if population is None:
            return

        #self.config.stats.start(repr(self) + "history.update.all")
        self._empty = False
        self.evals += len(population)
        self.updates += 1

        #self.config.stats.start(repr(self) + "history.update.scoreall")
        # score the sample
        pop = population
        scored = [(x, self.score(x, fitness, space)) for x in pop]
        #self.config.stats.stop(repr(self) + "history.update.scoreall")
        #self.config.stats.start(repr(self) + "history.update.findbest")

        if self.root and self.config.observer is not None:
            self.config.observer.report(opt, scored)

        for x, s in scored:
            if s > self.maxScore:
                self.maxScore = s
                self.maxSolution = x

            if s < self.minScore:
                self.minScore = s
                self.minSolution = x
        #self.config.stats.stop(repr(self) + "history.update.findbest")

        if not (self.updates % self.printEvery):
            genmin = min([s for x, s in scored])
            genmax = max([s for x, s in scored])
            genavg = np.average([s for x, s in scored])
            print self.updates, ": min", self.minScore, " max", self.maxScore,
            print " this generation (min, avg, max): ", genmin, genavg, genmax

        #self.config.stats.start(repr(self) + "history.update.internal")
        self.internalUpdate(scored)
        #self.config.stats.stop(repr(self) + "history.update.internal")
        #self.config.stats.stop(repr(self) + "history.update.all")
        return self

    def internalUpdate(self, population):
        """
         Update the state of the :class:`History` with the latest population 
         and its fitness scores. This is an internal call intended for
         overridden by subclasses. One of the important functions is to
         delete points no longer needed by the history.
          
         :params population: The previous population with its fitness scores. 
         :type population: list of (point, score) tuples
         
        """
        pass

    def score(self, point, fitness, space):
        """Get the fitness score, caching where possible.
      
         :param point: A valid point in the space
         :type point: Must match ``space.type``
         :param fitness: The fitness function
         :type fitness: Any callable
         :param space: The space to which the point belongs
         :type space: :class:`Space`
         :returns: The fitness value, cached if possible
         
        """
        if fitness is None:
            return None

        #self.config.stats.start("history.score")

        if self.useCache:
            try:
                hashed = space.hash(point)
                if self.cache.has_key(hashed):
                    ret = self.cache[hashed]
                    #self.config.stats.stop("history.score")
                    return ret
            except Exception:
                pass

        if not space.in_bounds(point):
            # use NaN so that the result is less than nor greater than
            # any other score, and therefore NEVER optimal
            s = np.inf - np.inf
        else:
            try:
                s = fitness(space.convert(point))
            except ValueError:
                s = np.inf - np.inf

        if self.useCache:
            try:
                hashed = space.hash(point)
                self.cache[hashed] = s
            except Exception:
                pass
        #self.config.stats.stop("history.score")
        return s

    def setCache(self, cache):
        self.cache = cache
示例#4
0
class History(object):
    """A History used to track the progress of an optimization algorithm.
    
    Different algorithms should extend this class in order to define
    the minimal amount of history that needs to be stored in order for the
    algorthm to operate.
    
    """
    useCache = True
    attrs = set()
    sorted = False
    root = True # whether the current history is the top level root for
                # all histories of this algorithm
    
    def __init__(self, config):
        super(History, self).__init__()
        self.config = config
        self.evals = 0
        self.minSolution = None
        self.minScore = np.inf
        self.maxSolution = None
        self.maxScore = -np.inf 
        self._empty = True
        if not hasattr(self, 'cache'):
            self.cache = LRUCache() # 10,000 items by default
        self.updates = 0
        #how often to print generation report
        self.printEvery = config.printEvery or 1000000000000L 
        self.attrs = set(["evals","minSolution","minScore","maxScore","attrs",
                          "minSolution", "maxSolution","_empty","cache",
                          "updates","printEvery", "useCache"])
    
    def __getstate__(self):
        """Used by :class:`CheckpointedHistory`
        and :class:`CheckpointedMultipleHistory` to checkpoint a history
        so it can be rolled back after updates.
        
        Should return all objects in the history in a dictionary, sensitive
        to the fact that object references may need to be copied.
        
        :returns: A dictionary with the state of the history
        
        """
        state = {}
      
        for attr in self.attrs:
            val = getattr(self, attr)
            if isinstance(val, list):
                val = [x for x in val]
            state[attr] = val
         
        state['cfg'] = self.config.__properties__
        return state

    def __setstate__(self, state):
        for attr in self.attrs:
            val = state[attr]
            setattr(self, attr, val)
         
        import pyec.config
        self.config = pyec.config.Config(**state['cfg'])  
    
    def empty(self):
        """Whether the history has been used or not."""
        return self._empty
    
    def better(self, score1, score2):
        """Return whether one score is better than another.
        
        Uses ``config.minimize`` to decide whether lesser or greater
        numbers are better.
        
        :param score1: the score in question (floating point number)
        :type score1: ``float``
        :param score2: the score being compared
        :type score2: ``float``
        :returns: whether ``score1`` is better than ``score2``
        """
        if self.config.minimize:
            return score1 < score2
        else:
            return score1 > score2
    
    def best(self):
        """Get the best solution, whether minimizing or maximizing.
        
        Same as ``optimal``
        
        """
        if self.config.minimize:
            return self.minimal()
        else:
            return self.maximal()
    
    optimal = best
    
    def minimal(self):
        """Get the minimal solution and its score.
        
        :returns: A tuple of two item with the solution object first
                  and the score of that item second 
                  
        """
        return self.minSolution, self.minScore
        
    def maximal(self):
        """Get the maximal solution and its score.
        
        :returns: A tuple of two item with the solution object first
                  and the score of that item second 
                  
        """
        return self.maxSolution, self.maxScore
        
    def num_evaluations(self):
        """Get the number of function evaluations performed in this 
        history object.
        
        :returns: An integer representing the number of times the fitness
                  function or objective has been evaluated
                  
        """
        return self.evals
        
    def update(self, population, fitness, space, opt):
        """
         Update the state of the :class:`History` with the latest population 
         and its fitness scores. Subclasses should probably override
         ``internalUpdate`` rather than ``update``, unless they want to
         change how the min/max are tracked.
         
         Returns the history for use in continuations.
         
         If ``population`` is ``None``, then this method does nothing; this
         is so that you can set up a loop like to run an optimizer like::
         
         p = None
         f = lambda x: 
         t = History()
         o = some_optimizer
         s = o.config.space
         for i in xrange(generations):
             p = some_optimizer[t.update(p,f,s,0), f]()
         t.update(p,f,s,o)
          
         :params population: The previous population.
         :type population: list of points in the search domain
         :params fitness: The fitness / cost / objective function
         :type fitness: Any callable object
         :params space: The search domain
         :type space: :class:`Space`
         :params opt: The optimizer reporting this population
         :type opt: :class:`PopulationDistribution`
         :returns: The history (``self``), for continuations
         
        """
        if population is None:
            return
        
        #self.config.stats.start(repr(self) + "history.update.all")
        self._empty = False
        self.evals += len(population)
        self.updates += 1
        
        #self.config.stats.start(repr(self) + "history.update.scoreall")
        # score the sample
        pop  = population
        scored = [(x, self.score(x, fitness, space)) for x in pop]
        #self.config.stats.stop(repr(self) + "history.update.scoreall")
        #self.config.stats.start(repr(self) + "history.update.findbest")
        
        if self.root and self.config.observer is not None:
            self.config.observer.report(opt, scored)
        
        for x,s in scored:
            if s > self.maxScore:
                 self.maxScore = s
                 self.maxSolution = x
               
            if s < self.minScore:
                 self.minScore = s
                 self.minSolution = x
        #self.config.stats.stop(repr(self) + "history.update.findbest")
        
        if not (self.updates % self.printEvery):
            genmin = min([s for x,s in scored])
            genmax = max([s for x,s in scored])
            genavg = np.average([s for x,s in scored])
            print self.updates, ": min", self.minScore, " max", self.maxScore,
            print " this generation (min, avg, max): ", genmin, genavg, genmax
        
        #self.config.stats.start(repr(self) + "history.update.internal")       
        self.internalUpdate(scored)
        #self.config.stats.stop(repr(self) + "history.update.internal")
        #self.config.stats.stop(repr(self) + "history.update.all")
        return self
        
    def internalUpdate(self, population):
        """
         Update the state of the :class:`History` with the latest population 
         and its fitness scores. This is an internal call intended for
         overridden by subclasses. One of the important functions is to
         delete points no longer needed by the history.
          
         :params population: The previous population with its fitness scores. 
         :type population: list of (point, score) tuples
         
        """    
        pass

    def score(self, point, fitness, space):
        """Get the fitness score, caching where possible.
      
         :param point: A valid point in the space
         :type point: Must match ``space.type``
         :param fitness: The fitness function
         :type fitness: Any callable
         :param space: The space to which the point belongs
         :type space: :class:`Space`
         :returns: The fitness value, cached if possible
         
        """
        if fitness is None: 
            return None
        
        #self.config.stats.start("history.score")
            
        if self.useCache: 
            try:
                hashed = space.hash(point)
                if self.cache.has_key(hashed):
                    ret = self.cache[hashed]
                    #self.config.stats.stop("history.score")
                    return ret
            except Exception:
                pass
        
        if not space.in_bounds(point):
            # use NaN so that the result is less than nor greater than
            # any other score, and therefore NEVER optimal
            s = np.inf - np.inf
        else:
            try:
                s = fitness(space.convert(point))
            except ValueError:
                s = np.inf - np.inf
        
        if self.useCache:
            try:
                hashed = space.hash(point)
                self.cache[hashed] = s
            except Exception:
                pass
        #self.config.stats.stop("history.score")
        return s
    
    def setCache(self, cache):
        self.cache = cache