def brute_force(self, **kwargs): """ Sample random bayesian network and keep the best Args names (list of string): the names of the nodes data (np array): (nsamples, nfeatures) """ # get args names = kwargs.get("names") data = kwargs.get("data") nsamples = kwargs.get("nsamples", 1000) # initialize g = BayesNet(names) g.random_init() s = g.score(data) # explore for i in xrange(nsamples): sys.stdout.write("\rIter {}".format(i)) sys.stdout.flush() g_new = BayesNet(names) g_new.random_init() s_new = g_new.score(data) if s_new > s: print "\nFound new best score at {}".format(s_new) g = g_new s = s_new return g, s
def genetic(self, **kwargs): """ Implements genetic reproduction If local search is set to True, implements mimetic """ names = kwargs.get("names") data = kwargs.get("data") max_iter = kwargs.get("max_iter", 30) nb_start = kwargs.get("nb_start", 10) max_pop = kwargs.get("max_pop", nb_start) max_parents = kwargs.get("max_parents", None) mut_rate = kwargs.get("mut_rate", 0.01) local_search = kwargs.get("local_search", False) # initialize the population s_max = None g_max = None population = [] for i in xrange(nb_start): g = BayesNet(names) g.random_init(max_parents) if local_search: g, s, _ = self.best_neighbour(names, data, g, max_parents) else: s = g.score(data) population += [(g, s)] if s > s_max or s_max is None: s_max = s g_max = g # let evolution do its work criteria = True niter = 0 def update_criteria_from(population): s = None g = None for (_g, _s) in population: if s is None or _s > s: s = _s g = _g if s > s_max: return g, s, True else: return g_max, s_max, True while criteria and niter < max_iter: print "Iter {}, Population {}".format(niter, len(population)) population = self.evolve(names, data, population, max_parents, mut_rate, max_pop, local_search) g_max, s_max, criteria = update_criteria_from(population) if self.plotting: try: self.plt_mgr.add(name="Genetic Score Max", y=s_max) self.plt_mgr.update() except Exception, e: pass niter += 1
def evolve(self, names, data, population, max_parents, mut_rate, max_pop, local_search): """ Given a population, creates a new population with random pairing and mixing If local seach is true, children is the best neigbour of the random merge """ new_population = [] s_tot = sum([s for (_, s) in population]) n = len(population) population = np.random.permutation(population) for p in xrange(n / 2): (g1, s1) = population[2 * p] (g2, s2) = population[2 * p + 1] nchildren = int(n * (s1 + s2) / s_tot) + 1 for i in xrange(nchildren): if len(new_population) < max_pop: g = BayesNet(names) g.merge(g1, g2, s1 / s_tot, s2 / s_tot, max_parents, mut_rate) if local_search: g, s, _ = self.best_neighbour(names, data, g, max_parents) else: s = g.score(data) new_population += [(g, s)] if self.plotting: try: self.plt_mgr.add(name="Genetic Score", y=s) self.plt_mgr.update() except Exception, e: pass
def k2(self, **kwargs): """ Implements k2 algorithm """ names = kwargs.get("names") data = kwargs.get("data") max_iter = kwargs.get("max_iter", 30) nb_start = kwargs.get("nb_start", 3) max_parents = kwargs.get("max_parents", None) ordering = np.random.permutation(range(len(names))) g = BayesNet(names) s = g.score(data) for i in ordering: found_new = True while found_new: print "Node {}, score is {}".format(i, s) g, s, found_new = self.best_parent(g, s, i, data, max_parents) if self.plotting: try: self.plt_mgr.add(name="score k2 {}".format( self.start_no), y=s) self.plt_mgr.update() except Exception, e: pass
def hill_climbing(self, **kwargs): """ Implements Hill Climbing Algorithm Args names (list of string): the name of the nodes data (np array): (nsamples, nfeatures) max_iter (int): max number of iteration g0 (BayesNet): the start point Returns g: best graph found s: score of best graph """ # get args names = kwargs.get("names") data = kwargs.get("data") max_iter = kwargs.get("max_iter", 20) max_parents = kwargs.get("max_parents", None) # initialize g0 = BayesNet(names) g0.random_init(max_parents=max_parents) g = g0 s = g0.score(data) found_new = True niter = 0 # explore while found_new and niter < max_iter: print "Iter {}".format(niter) niter += 1 g, s, found_new = self.best_neighbour(names, data, g, max_parents) if self.plotting: try: self.plt_mgr.add(name="score hill climbing {}".format( self.start_no), y=s) self.plt_mgr.update() except Exception, e: pass