class BayesianDirichletScorer(object): def __init__(self): self.cache = {} self.varCache = LRUCache() def matchesPrior(self, data, configuration): return 1.0 def matches(self, data, config): """count number of instances of configuration in data""" cnt = 0L for x in data: if config <= x: cnt += 1 return cnt def __call__(self, network, data): network.computeEdgeStatistics() total = 0.0 total -= network.edgeRatio * len(data) * 10 for variable in network.variables: varKey = str(variable.index) + str(variable.parents) if self.varCache.has_key(varKey): total += self.varCache[varKey] continue start = total for configuration in variable.configurations(): prior = self.matchesPrior(data, configuration) total += gammaln(prior) total -= gammaln(prior + self.matches(data, configuration)) for val in variable.values(): priorVal = self.matchesPrior(data, configuration + val) total -= gammaln(priorVal) total += gammaln(priorVal + self.matches(data, configuration + val)) self.varCache[varKey] = total - start return total / len(data)
class History(object): """A History used to track the progress of an optimization algorithm. Different algorithms should extend this class in order to define the minimal amount of history that needs to be stored in order for the algorthm to operate. """ useCache = True attrs = set() sorted = False root = True # whether the current history is the top level root for # all histories of this algorithm def __init__(self, config): super(History, self).__init__() self.config = config self.evals = 0 self.minSolution = None self.minScore = np.inf self.maxSolution = None self.maxScore = -np.inf self._empty = True if not hasattr(self, 'cache'): self.cache = LRUCache() # 10,000 items by default self.updates = 0 #how often to print generation report self.printEvery = config.printEvery or 1000000000000L self.attrs = set([ "evals", "minSolution", "minScore", "maxScore", "attrs", "minSolution", "maxSolution", "_empty", "cache", "updates", "printEvery", "useCache" ]) def __getstate__(self): """Used by :class:`CheckpointedHistory` and :class:`CheckpointedMultipleHistory` to checkpoint a history so it can be rolled back after updates. Should return all objects in the history in a dictionary, sensitive to the fact that object references may need to be copied. :returns: A dictionary with the state of the history """ state = {} for attr in self.attrs: val = getattr(self, attr) if isinstance(val, list): val = [x for x in val] state[attr] = val state['cfg'] = self.config.__properties__ return state def __setstate__(self, state): for attr in self.attrs: val = state[attr] setattr(self, attr, val) import pyec.config self.config = pyec.config.Config(**state['cfg']) def empty(self): """Whether the history has been used or not.""" return self._empty def better(self, score1, score2): """Return whether one score is better than another. Uses ``config.minimize`` to decide whether lesser or greater numbers are better. :param score1: the score in question (floating point number) :type score1: ``float`` :param score2: the score being compared :type score2: ``float`` :returns: whether ``score1`` is better than ``score2`` """ if self.config.minimize: return score1 < score2 else: return score1 > score2 def best(self): """Get the best solution, whether minimizing or maximizing. Same as ``optimal`` """ if self.config.minimize: return self.minimal() else: return self.maximal() optimal = best def minimal(self): """Get the minimal solution and its score. :returns: A tuple of two item with the solution object first and the score of that item second """ return self.minSolution, self.minScore def maximal(self): """Get the maximal solution and its score. :returns: A tuple of two item with the solution object first and the score of that item second """ return self.maxSolution, self.maxScore def num_evaluations(self): """Get the number of function evaluations performed in this history object. :returns: An integer representing the number of times the fitness function or objective has been evaluated """ return self.evals def update(self, population, fitness, space, opt): """ Update the state of the :class:`History` with the latest population and its fitness scores. Subclasses should probably override ``internalUpdate`` rather than ``update``, unless they want to change how the min/max are tracked. Returns the history for use in continuations. If ``population`` is ``None``, then this method does nothing; this is so that you can set up a loop like to run an optimizer like:: p = None f = lambda x: t = History() o = some_optimizer s = o.config.space for i in xrange(generations): p = some_optimizer[t.update(p,f,s,0), f]() t.update(p,f,s,o) :params population: The previous population. :type population: list of points in the search domain :params fitness: The fitness / cost / objective function :type fitness: Any callable object :params space: The search domain :type space: :class:`Space` :params opt: The optimizer reporting this population :type opt: :class:`PopulationDistribution` :returns: The history (``self``), for continuations """ if population is None: return #self.config.stats.start(repr(self) + "history.update.all") self._empty = False self.evals += len(population) self.updates += 1 #self.config.stats.start(repr(self) + "history.update.scoreall") # score the sample pop = population scored = [(x, self.score(x, fitness, space)) for x in pop] #self.config.stats.stop(repr(self) + "history.update.scoreall") #self.config.stats.start(repr(self) + "history.update.findbest") if self.root and self.config.observer is not None: self.config.observer.report(opt, scored) for x, s in scored: if s > self.maxScore: self.maxScore = s self.maxSolution = x if s < self.minScore: self.minScore = s self.minSolution = x #self.config.stats.stop(repr(self) + "history.update.findbest") if not (self.updates % self.printEvery): genmin = min([s for x, s in scored]) genmax = max([s for x, s in scored]) genavg = np.average([s for x, s in scored]) print self.updates, ": min", self.minScore, " max", self.maxScore, print " this generation (min, avg, max): ", genmin, genavg, genmax #self.config.stats.start(repr(self) + "history.update.internal") self.internalUpdate(scored) #self.config.stats.stop(repr(self) + "history.update.internal") #self.config.stats.stop(repr(self) + "history.update.all") return self def internalUpdate(self, population): """ Update the state of the :class:`History` with the latest population and its fitness scores. This is an internal call intended for overridden by subclasses. One of the important functions is to delete points no longer needed by the history. :params population: The previous population with its fitness scores. :type population: list of (point, score) tuples """ pass def score(self, point, fitness, space): """Get the fitness score, caching where possible. :param point: A valid point in the space :type point: Must match ``space.type`` :param fitness: The fitness function :type fitness: Any callable :param space: The space to which the point belongs :type space: :class:`Space` :returns: The fitness value, cached if possible """ if fitness is None: return None #self.config.stats.start("history.score") if self.useCache: try: hashed = space.hash(point) if self.cache.has_key(hashed): ret = self.cache[hashed] #self.config.stats.stop("history.score") return ret except Exception: pass if not space.in_bounds(point): # use NaN so that the result is less than nor greater than # any other score, and therefore NEVER optimal s = np.inf - np.inf else: try: s = fitness(space.convert(point)) except ValueError: s = np.inf - np.inf if self.useCache: try: hashed = space.hash(point) self.cache[hashed] = s except Exception: pass #self.config.stats.stop("history.score") return s def setCache(self, cache): self.cache = cache
class History(object): """A History used to track the progress of an optimization algorithm. Different algorithms should extend this class in order to define the minimal amount of history that needs to be stored in order for the algorthm to operate. """ useCache = True attrs = set() sorted = False root = True # whether the current history is the top level root for # all histories of this algorithm def __init__(self, config): super(History, self).__init__() self.config = config self.evals = 0 self.minSolution = None self.minScore = np.inf self.maxSolution = None self.maxScore = -np.inf self._empty = True if not hasattr(self, 'cache'): self.cache = LRUCache() # 10,000 items by default self.updates = 0 #how often to print generation report self.printEvery = config.printEvery or 1000000000000L self.attrs = set(["evals","minSolution","minScore","maxScore","attrs", "minSolution", "maxSolution","_empty","cache", "updates","printEvery", "useCache"]) def __getstate__(self): """Used by :class:`CheckpointedHistory` and :class:`CheckpointedMultipleHistory` to checkpoint a history so it can be rolled back after updates. Should return all objects in the history in a dictionary, sensitive to the fact that object references may need to be copied. :returns: A dictionary with the state of the history """ state = {} for attr in self.attrs: val = getattr(self, attr) if isinstance(val, list): val = [x for x in val] state[attr] = val state['cfg'] = self.config.__properties__ return state def __setstate__(self, state): for attr in self.attrs: val = state[attr] setattr(self, attr, val) import pyec.config self.config = pyec.config.Config(**state['cfg']) def empty(self): """Whether the history has been used or not.""" return self._empty def better(self, score1, score2): """Return whether one score is better than another. Uses ``config.minimize`` to decide whether lesser or greater numbers are better. :param score1: the score in question (floating point number) :type score1: ``float`` :param score2: the score being compared :type score2: ``float`` :returns: whether ``score1`` is better than ``score2`` """ if self.config.minimize: return score1 < score2 else: return score1 > score2 def best(self): """Get the best solution, whether minimizing or maximizing. Same as ``optimal`` """ if self.config.minimize: return self.minimal() else: return self.maximal() optimal = best def minimal(self): """Get the minimal solution and its score. :returns: A tuple of two item with the solution object first and the score of that item second """ return self.minSolution, self.minScore def maximal(self): """Get the maximal solution and its score. :returns: A tuple of two item with the solution object first and the score of that item second """ return self.maxSolution, self.maxScore def num_evaluations(self): """Get the number of function evaluations performed in this history object. :returns: An integer representing the number of times the fitness function or objective has been evaluated """ return self.evals def update(self, population, fitness, space, opt): """ Update the state of the :class:`History` with the latest population and its fitness scores. Subclasses should probably override ``internalUpdate`` rather than ``update``, unless they want to change how the min/max are tracked. Returns the history for use in continuations. If ``population`` is ``None``, then this method does nothing; this is so that you can set up a loop like to run an optimizer like:: p = None f = lambda x: t = History() o = some_optimizer s = o.config.space for i in xrange(generations): p = some_optimizer[t.update(p,f,s,0), f]() t.update(p,f,s,o) :params population: The previous population. :type population: list of points in the search domain :params fitness: The fitness / cost / objective function :type fitness: Any callable object :params space: The search domain :type space: :class:`Space` :params opt: The optimizer reporting this population :type opt: :class:`PopulationDistribution` :returns: The history (``self``), for continuations """ if population is None: return #self.config.stats.start(repr(self) + "history.update.all") self._empty = False self.evals += len(population) self.updates += 1 #self.config.stats.start(repr(self) + "history.update.scoreall") # score the sample pop = population scored = [(x, self.score(x, fitness, space)) for x in pop] #self.config.stats.stop(repr(self) + "history.update.scoreall") #self.config.stats.start(repr(self) + "history.update.findbest") if self.root and self.config.observer is not None: self.config.observer.report(opt, scored) for x,s in scored: if s > self.maxScore: self.maxScore = s self.maxSolution = x if s < self.minScore: self.minScore = s self.minSolution = x #self.config.stats.stop(repr(self) + "history.update.findbest") if not (self.updates % self.printEvery): genmin = min([s for x,s in scored]) genmax = max([s for x,s in scored]) genavg = np.average([s for x,s in scored]) print self.updates, ": min", self.minScore, " max", self.maxScore, print " this generation (min, avg, max): ", genmin, genavg, genmax #self.config.stats.start(repr(self) + "history.update.internal") self.internalUpdate(scored) #self.config.stats.stop(repr(self) + "history.update.internal") #self.config.stats.stop(repr(self) + "history.update.all") return self def internalUpdate(self, population): """ Update the state of the :class:`History` with the latest population and its fitness scores. This is an internal call intended for overridden by subclasses. One of the important functions is to delete points no longer needed by the history. :params population: The previous population with its fitness scores. :type population: list of (point, score) tuples """ pass def score(self, point, fitness, space): """Get the fitness score, caching where possible. :param point: A valid point in the space :type point: Must match ``space.type`` :param fitness: The fitness function :type fitness: Any callable :param space: The space to which the point belongs :type space: :class:`Space` :returns: The fitness value, cached if possible """ if fitness is None: return None #self.config.stats.start("history.score") if self.useCache: try: hashed = space.hash(point) if self.cache.has_key(hashed): ret = self.cache[hashed] #self.config.stats.stop("history.score") return ret except Exception: pass if not space.in_bounds(point): # use NaN so that the result is less than nor greater than # any other score, and therefore NEVER optimal s = np.inf - np.inf else: try: s = fitness(space.convert(point)) except ValueError: s = np.inf - np.inf if self.useCache: try: hashed = space.hash(point) self.cache[hashed] = s except Exception: pass #self.config.stats.stop("history.score") return s def setCache(self, cache): self.cache = cache