def __init__(self, config): super(History, self).__init__() self.config = config self.evals = 0 self.minSolution = None self.minScore = np.inf self.maxSolution = None self.maxScore = -np.inf self._empty = True if not hasattr(self, 'cache'): self.cache = LRUCache() # 10,000 items by default self.updates = 0 #how often to print generation report self.printEvery = config.printEvery or 1000000000000L self.attrs = set([ "evals", "minSolution", "minScore", "maxScore", "attrs", "minSolution", "maxSolution", "_empty", "cache", "updates", "printEvery", "useCache" ])
def __init__(self): self.cache = {} self.varCache = LRUCache()
class BayesNet(Distribution): counter = 0 created = 0 config = Config(numVariables=None, branchFactor=10, variableGenerator=BayesVariable, structureGenerator=GreedyStructureSearch( 10, BayesianInformationCriterion()), sampler=DAGSampler(), randomizer=lambda net: None) densityCache = LRUCache() weightCache = LRUCache() likelihoodCache = LRUCache() def __init__(self, **kwargs): config = BayesNet.config.merge(Config(**kwargs)) super(BayesNet, self).__init__(**config.__properties__) self.numVariables = self.config.numVariables self.variableGenerator = self.config.variableGenerator self.structureGenerator = self.config.structureGenerator self.randomizer = self.config.randomizer self.sampler = self.config.sampler self.variables = [] for i in xrange(self.numVariables): self.variables.append(self.variableGenerator(i, self.config)) self.decay = 1 self.dirty = False self.acyclic = True self.edges = [] self.edgeRatio = 0.0 self.edgeTuples = None self.cacheKeys = dict([(v.index, v.cacheKey) for v in self.variables]) self.edgeMap = {} self.binary = zeros(len(self.variables)**2) self.deferred = False self.deferredWeights = False self.edgeRep = None self.densityStored = None self.cacheHits = 0 self.cacheTries = 0 self.changed = {} self.last = {} self.__class__.counter += 1 self.__class__.created += 1 def __del__(self): for variable in self.variables: del variable self.__class__.counter -= 1 def __copy__(self): return self.__class__.parse(str(self), self.config) @checkDeferred def get(self, index): for var in self.variables: if var.index == index: return var return None @checkDeferred def sort(self): self.variables = sorted(self.variables, key=lambda x: len(x.parents)) visited = {} skipped = {} #print "before: ", [v.index for v in self.variables] vars = LinkedList() for var in self.variables: vars.append(var) current = vars.first while current is not None: v = current.value advance = True for idx, p in v.parents.iteritems(): if not visited.has_key(idx): before = current.before vars.remove(current) vars.append(v) if before is not None: current = before.after advance = False skipped[v.index] = True break if not advance and skipped.has_key(current.value.index): break if advance: current = current.after skipped = {} visited[v.index] = True #print i, visited, v.index, v.parents current = vars.first self.variables = [] while current is not None: self.variables.append(current.value) current = current.after #print "after: ", [v.index for v in self.variables] return len(skipped) == 0 @checkDeferred def decompose(self): acyclic = self.sort() #self.variables is now ordered in a way to allow sampling clusters = [] for var in self.variables: # is the parent in one of the clusters? member = False for c in clusters: for v in c: if var.parents.has_key(v): member = True break if member: c.append(var.index) break if not member: clusters.append([var.index]) return clusters @checkDeferredWeights def distribution(self, index, x): var = None for v in self.variables: if v.index == index: var = v return var.distribution(x) @checkDeferredWeights def randomize(self): return self.randomizer(self) @checkDeferredWeights def conditionalLikelihood(self, index, data): sum = 0.0 var = None for v in self.variables: if v.index == index: var = v break for x in data: key = self.cacheKeys[var.index] key += str(x[var.index]) if not BayesNet.densityCache.has_key(key): BayesNet.densityCache[key] = var.density(x) sum += log(BayesNet.densityCache[key]) return sum @checkDeferredWeights def likelihood(self, data): prod = 0.0 for v in self.variables: vprod = 0.0 for x in data: key = self.cacheKeys[v.index] key += str(x[v.index]) if not BayesNet.densityCache.has_key(key): BayesNet.densityCache[key] = v.density(x) vprod += log(BayesNet.densityCache[key]) if isinstance(vprod, np.ndarray): vprod = vprod[0] self.last[v.index] = vprod prod += vprod self.densityStored = prod return prod @checkDeferredWeights def likelihoodChanged(self, data, storeChange=False): diff = 0.0 olddiff = 0.0 total = 0.0 for v in self.variables: if self.changed.has_key(v.index) and self.changed[v.index]: inner = 0.0 for x in data: key = self.cacheKeys[v.index] key += str(x[v.index]) if not BayesNet.densityCache.has_key(key): BayesNet.densityCache[key] = v.density(x) inner += log(BayesNet.densityCache[key]) total += inner if storeChange: self.last[v.index] = inner else: total += self.last[v.index] return total @checkDeferredWeights def marginal(self, cmpIdx, data): var = None for v in self.variables: if v.index == cmpIdx: var = v break total = 0.0 for t in data: total += var.marginalDensity(t, self) return total / len(data) @checkDeferredWeights def map(self, cmpIdx, data): var = None for v in self.variables: if v.index == cmpIdx: var = v break total = 0.0 for t in data: z = var.map(t) if z[cmpIdx] == t[cmpIdx]: total += 1.0 return total / len(data) @checkDeferredWeights def density(self, x): self.computeEdgeStatistics() prod = 1.0 for variable in self.variables: key = self.cacheKeys[variable.index] key += str(x[variable.index]) if not BayesNet.densityCache.has_key(key): BayesNet.densityCache[key] = variable.density(x) #print key, " - ", BayesNet.densityCache[key], " - ", variable.density(x) prod *= BayesNet.densityCache[key] return prod @checkDeferredWeights def __call__(self): """sample the network""" return self.sampler(self) @checkDeferredWeights def sample(self): return self.__call__() @checkDeferredWeights def batch(self, num): return [self.__call__() for i in xrange(num)] @checkDeferred def numFreeParameters(self): total = 0 for variable in self.variables: total += variable.numFreeParameters() return total @checkDeferred def update(self, epoch, data): self.computeEdgeStatistics() for variable in self.variables: self.updateVar(variable, data) @checkDeferred def updateVar(self, variable, data): self.computeEdgeStatistics() key = self.cacheKeys[variable.index] if not BayesNet.weightCache.has_key(key): variable.update(data) BayesNet.weightCache[key] = variable.getComputedState() else: variable.restoreComputedState(BayesNet.weightCache[key]) @checkDeferred def merge(self, other, data): return self.structureGenerator.merge(self, other, data) @checkDeferred def cross(self, other, data): return self.structureGenerator.cross(self, other, data) @checkDeferred def hasEdge(self, frm, t): """Whether the network has an edge from the parent with index 'from' to the child with index 'to' """ return (frm, t) in self.edges #try: # toNode = [variable for variable in self.variables if variable.index == t][0] # fromNode = [parent for l,parent in fromNode.parents.iteritems() if parent.index == frm][0] # return True #except Exception: # return False @checkDeferred def isAcyclic(self): """Is the network a DAG?""" if self.dirty: self.computeEdgeStatistics() return self.acyclic """ tested = set([]) for variable in self.variables: if len(set(variable.parents) - tested) > 0: self.acyclic = False return False tested = set(list(tested) + [variable]) self.acyclic = True return True """ """ for variable in self.variables: if variable in variable.parents: return False tested = set([]) while len(tested) < len(self.variables): added = False for variable in self.variables: if variable in tested: continue if len(set(variable.parents) - tested) == 0: tested = set(list(tested) + [variable]) added = True break if not added: return False return True """ @checkDeferred def structureSearch(self, data): return self.structureGenerator.search(self, data) @checkDeferredWeights def getComputedState(self): state = {} self.computeEdgeStatistics() state['acyclic'] = self.acyclic state['edges'] = self.edges state['edgeRatio'] = self.edgeRatio state['edgeTuples'] = self.edgeTuples state['cacheKeys'] = self.cacheKeys varstate = {} varorder = {} for i, v in enumerate(self.variables): varstate[v.index] = v.getComputedState() varorder[i] = v.index state['varstate'] = varstate state['varorder'] = varorder return state def restoreComputedState(self, state): self.dirty = False self.acyclic = state['acyclic'] self.edges = state['edges'] self.edgeRatio = state['edgeRatio'] self.edgeTuples = state['edgeTuples'] self.cacheKeys = state['cacheKeys'] varorder = state['varorder'] varstate = state['varstate'] self.variables = sorted(self.variables, key=lambda v: varorder[v.index]) for v in self.variables: v.restoreComputedState(varstate[v.index]) @checkDeferred def computeEdgeStatistics(self): if not self.dirty: return self.acyclic = self.sort() if self.edges is not None: del self.edges self.edges = [] for variable in self.variables: for l, variable2 in variable.parents.iteritems(): self.edges.append((variable2, variable)) self.edges = sorted(self.edges, key=lambda e: (e[0].index, e[1].index)) self.edgeRatio = len(self.edges) / (1e-10 + (len(self.variables)**2)) self.edgeTuples = [(frm.index, to.index) for frm, to in self.edges] self.cacheKeys = dict([(v.index, v.cacheKey) for v in self.variables]) self.dirty = False self.densityStored = None @checkDeferred def getChildren(self, variable): children = [] self.computeEdgeStatistics() for variable2 in self.variables: if variable2.parents.has_key(variable.index): children.append(variable2) return children @checkDeferred def updateVariables(self, data): for variable in self.variables: variable.update(data) @checkDeferred def __getitem__(self, index): if self.edgeMap.has_key(index): return self.edgeMap[index] frmidx = index % self.numVariables toidx = index / self.numVariables for l, v in self.variables[toidx].parents.iteritems(): if v.index == frmidx: self.edgeMap[index] = True return True self.edgeMap[index] = False return False def __len__(self): return self.numVariables**2 def __getstate__(self): return { 'v': self.variables, 'r': self.randomizer, 's': self.sampler, 'sg': self.structureGenerator, } def __setstate__(self, state): self.dirty = True self.variables = state['v'] self.numVariables = len(self.variables) self.randomizer = state['r'] self.structureGenerator = state['sg'] self.sampler = state['s'] indexMap = {} for variable in self.variables: indexMap[variable.index] = variable for variable in self.variables: variable.parents = {} for i in variable.parentIndices: variable.addParent(indexMap[i]) self.changed = {} self.last = {} self.deferred = False self.deferredWeights = True self.edgeRep = None self.edgeMap = {} self.edgeTuples = None self.edges = None self.binary = None self.densityStored = None self.computeEdgeStatistics() @checkDeferred def __str__(self): """pickle the object""" self.computeEdgeStatistics() return cPickle.dumps(len(self.variables)) + cPickle.dumps( self.edgeTuples) def initialize(net): for frm, to in net.edgeRep: net.variables[to].addParent(net.variables[frm]) net.dirty = True net.deferred = False net.edgeRep = None net.computeEdgeStatistics() @checkDeferred def estimate(net): for variable in net.variables: net.updateVar(variable, net.config.data) net.deferredWeights = False @classmethod def parse(cls, rep, cfg): io = StringIO(rep) numVars = cPickle.load(io) if cfg is None: cfg = BayesNet.config net = cls(**cfg.__properties__) edges = cPickle.load(io) net.edgeRep = edges net.deferred = True net.deferredWeights = True return net