示例#1
0
    def evolve(self, names, data, population, max_parents, mut_rate, max_pop,
               local_search):
        """
		Given a population, creates a new population with random pairing and mixing

		If local seach is true, children is the best neigbour of the random merge
		"""
        new_population = []
        s_tot = sum([s for (_, s) in population])
        n = len(population)
        population = np.random.permutation(population)
        for p in xrange(n / 2):
            (g1, s1) = population[2 * p]
            (g2, s2) = population[2 * p + 1]
            nchildren = int(n * (s1 + s2) / s_tot) + 1
            for i in xrange(nchildren):
                if len(new_population) < max_pop:
                    g = BayesNet(names)
                    g.merge(g1, g2, s1 / s_tot, s2 / s_tot, max_parents,
                            mut_rate)
                    if local_search:
                        g, s, _ = self.best_neighbour(names, data, g,
                                                      max_parents)
                    else:
                        s = g.score(data)
                    new_population += [(g, s)]
                    if self.plotting:
                        try:
                            self.plt_mgr.add(name="Genetic Score", y=s)
                            self.plt_mgr.update()
                        except Exception, e:
                            pass
示例#2
0
    def genetic(self, **kwargs):
        """
		Implements genetic reproduction

		If local search is set to True, implements mimetic
		"""
        names = kwargs.get("names")
        data = kwargs.get("data")
        max_iter = kwargs.get("max_iter", 30)
        nb_start = kwargs.get("nb_start", 10)
        max_pop = kwargs.get("max_pop", nb_start)
        max_parents = kwargs.get("max_parents", None)
        mut_rate = kwargs.get("mut_rate", 0.01)
        local_search = kwargs.get("local_search", False)

        # initialize the population
        s_max = None
        g_max = None
        population = []
        for i in xrange(nb_start):
            g = BayesNet(names)
            g.random_init(max_parents)
            if local_search:
                g, s, _ = self.best_neighbour(names, data, g, max_parents)
            else:
                s = g.score(data)

            population += [(g, s)]
            if s > s_max or s_max is None:
                s_max = s
                g_max = g

        # let evolution do its work
        criteria = True
        niter = 0

        def update_criteria_from(population):
            s = None
            g = None
            for (_g, _s) in population:
                if s is None or _s > s:
                    s = _s
                    g = _g
            if s > s_max:
                return g, s, True
            else:
                return g_max, s_max, True

        while criteria and niter < max_iter:
            print "Iter {}, Population {}".format(niter, len(population))
            population = self.evolve(names, data, population, max_parents,
                                     mut_rate, max_pop, local_search)
            g_max, s_max, criteria = update_criteria_from(population)
            if self.plotting:
                try:
                    self.plt_mgr.add(name="Genetic Score Max", y=s_max)
                    self.plt_mgr.update()
                except Exception, e:
                    pass
            niter += 1
示例#3
0
    def brute_force(self, **kwargs):
        """
		Sample random bayesian network and keep the best

		Args
			names (list of string): the names of the nodes
			data (np array): (nsamples, nfeatures)
		"""
        # get args
        names = kwargs.get("names")
        data = kwargs.get("data")
        nsamples = kwargs.get("nsamples", 1000)

        # initialize
        g = BayesNet(names)
        g.random_init()
        s = g.score(data)

        # explore
        for i in xrange(nsamples):
            sys.stdout.write("\rIter {}".format(i))
            sys.stdout.flush()
            g_new = BayesNet(names)
            g_new.random_init()
            s_new = g_new.score(data)
            if s_new > s:
                print "\nFound new best score at {}".format(s_new)
                g = g_new
                s = s_new
        return g, s
示例#4
0
    def k2(self, **kwargs):
        """
		Implements k2 algorithm
		"""
        names = kwargs.get("names")
        data = kwargs.get("data")
        max_iter = kwargs.get("max_iter", 30)
        nb_start = kwargs.get("nb_start", 3)
        max_parents = kwargs.get("max_parents", None)

        ordering = np.random.permutation(range(len(names)))
        g = BayesNet(names)
        s = g.score(data)

        for i in ordering:
            found_new = True
            while found_new:
                print "Node {}, score is {}".format(i, s)
                g, s, found_new = self.best_parent(g, s, i, data, max_parents)
                if self.plotting:
                    try:
                        self.plt_mgr.add(name="score k2 {}".format(
                            self.start_no),
                                         y=s)
                        self.plt_mgr.update()
                    except Exception, e:
                        pass
示例#5
0
def precision_recall():
#   from sklearn.metrics import roc_auc_score
#   from sklearn.metrics import roc_curve
  from sklearn.metrics import precision_recall_curve
  from sklearn.metrics import auc
  from sklearn.metrics import classification_report
  from mpltools import style
  style.use('ggplot')

  makes = ['bmw', 'ford']
  types = ['sedan', 'SUV']
  args = makes + types
  config = get_config(args)
  (dataset, config) = fgu.get_all_metadata(config)


  for ii, attrib_name in enumerate(args):
  #   attrib_name = 'bmw'

    attrib_clf = AttributeClassifier.load('../../../attribute_classifiers/{}.dat'.format(attrib_name))
    bnet = BayesNet(config, dataset['train_annos'],
                    dataset['class_meta'], [attrib_clf], desc=str(args))

    res = bnet.create_attrib_res_on_images()

    attrib_selector = AttributeSelector(config, dataset['class_meta'])
  #   attrib_meta = attrib_selector.create_attrib_meta([attrib_clf.name])
    pos_classes = attrib_selector.class_ids_for_attribute(attrib_name)
    true_labels = np.array(res.class_index.isin(pos_classes))


    print "--------------{}-------------".format(attrib_name)
    print res[str.lower(attrib_name)].describe()

    print classification_report(true_labels, np.array(res[str.lower(attrib_name)]) > 0.65,
                                target_names=['not-{}'.format(attrib_name),
                                              attrib_name])



    precision, recall, thresholds = precision_recall_curve(true_labels, np.array(res[str.lower(attrib_name)]))
    score = auc(recall, precision)
    print("Area Under Curve: %0.2f" % score)
#     score = roc_auc_score(true_labels, np.array(res[str.lower(attrib_name)]))
#     fpr, tpr, thresholds = roc_curve(true_labels, np.array(res[str.lower(attrib_name)]))
    plt.subplot(2,2,ii+1)
#     plt.plot(fpr, tpr)
    plt.plot(recall, precision, label='Precision-Recall curve')
    plt.title('Precision-Recall: {}'.format(attrib_name))
#     plt.xlabel('False Positive Rate')
#     plt.ylabel('True Positive Rate')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.legend(['area = {}'.format(score)])

  plt.draw()
  plt.show()
示例#6
0
def cross_entropy(bn1: BayesNet, bn2: BayesNet, nsamples: int = None) -> float:
	cross_ent = .0
	if nsamples is None:
		bn1_vars = bn1.nodes.keys()
		for sample in all_dicts(bn1_vars):
		    cross_ent -= np.exp(bn1.sample_log_prob(sample)) * bn2.sample_log_prob(sample)
	else:
		for _ in range(nsamples):
			cross_ent -= bn2.sample_log_prob(bn1.sample())
		cross_ent /= nsamples
	return cross_ent
示例#7
0
def main():
	args = get_args()
	table_bn = BayesNet(bn_file=args.file_name)
	mle_bn = MLEBayesNet(bn_file=args.file_name)
	parametric_bn = ParametricBayesNet(bn_file=args.file_name)

	print("Initial params MLE bn:")
	print(mle_bn.pretty_print())

	print("Initial params parametric bn:")
	print(parametric_bn.pretty_print())

	print("========== Frequentist MLE ==========")
	samples = read_samples(args.samples_file_name)
	mle_bn.learn_cpds(samples)

	print("Reference BN")
	print(table_bn.pretty_print())

	print("MLE BayesNet after learning CPDs")
	print(mle_bn.pretty_print())

	print("========== Parametric MLE ==========")

	# ref_cent = cross_entropy(table_bn, table_bn)
	# cent = cross_entropy(table_bn, parametric_bn, nsamples=100)
	# print("Step %6d | CE: %6.3f / %6.3f" % (0, cent, ref_cent))

	for step in range(1, 1000):
		sample = table_bn.sample()
		parametric_bn.learn(sample, learning_rate=args.lr)

		if step % 500 == 0:
			print("step: ", step)
			# cent = cross_entropy(table_bn, parametric_bn, nsamples=200)
			# print("Step %6d | CE: %6.3f / %6.3f" % (step, cent, ref_cent))
			# print(f"Step {step:6d} | CE: {cent:6.3f} / {ref_cent:6.3f}")

	print("Reference BN")
	print(table_bn.pretty_print())

	print("Parametric BayesNet after learning CPDs")
	print(parametric_bn.pretty_print())
示例#8
0
    def hill_climbing(self, **kwargs):
        """
		Implements Hill Climbing Algorithm

		Args
			names (list of string): the name of the nodes
			data (np array): (nsamples, nfeatures)
			max_iter (int): max number of iteration
			g0 (BayesNet): the start point

		Returns
			g: best graph found
			s: score of best graph

		"""
        # get args
        names = kwargs.get("names")
        data = kwargs.get("data")
        max_iter = kwargs.get("max_iter", 20)
        max_parents = kwargs.get("max_parents", None)

        # initialize
        g0 = BayesNet(names)
        g0.random_init(max_parents=max_parents)
        g = g0
        s = g0.score(data)
        found_new = True
        niter = 0

        # explore
        while found_new and niter < max_iter:
            print "Iter {}".format(niter)
            niter += 1
            g, s, found_new = self.best_neighbour(names, data, g, max_parents)
            if self.plotting:
                try:
                    self.plt_mgr.add(name="score hill climbing {}".format(
                        self.start_no),
                                     y=s)
                    self.plt_mgr.update()
                except Exception, e:
                    pass
示例#9
0
    def best_parent(self, g, s, i, data, max_parents):
        """
		Returns g by adding to node i the best parent that maximizes the score
		"""
        found_new = False
        r = g.compute_r(data)
        s_i = g.score_node(i, data, r)
        s_max = s
        g_max = g

        g_work = BayesNet(bn=g)
        for j in range(g.n):
            if j not in g_work.parents[i]:
                success = g_work.add_edge(j, i, max_parents)
                if success:
                    s_new = s - s_i + g_work.score_node(i, data, r)
                    if s_new > s_max:
                        found_new = True
                        s_max = s_new
                        g_max = BayesNet(bn=g_work)
                g_work.remove_edge(j, i)

        return g_max, s_max, found_new
示例#10
0
from bayes_net import BayesNet


def main():
    print(
        "Probabilidade Conjunta:",
        bn.jointProb([('ST', True), ('UPAL', True), ('CEA', False),
                      ('CP', True), ('PA', True), ('FEUR', False)]))
    print("Probabilidade Individual:", bn.indProb(("CEA", False)))


if __name__ == '__main__':
    bn = BayesNet()

    bn.add("ST", [], 0.60)
    bn.add("UPAL", [], 0.05)

    bn.add("CP", [("ST", True), ("PA", False)], 0.01)
    bn.add("CP", [("ST", True), ("PA", True)], 0.02)
    bn.add("CP", [("ST", False), ("PA", False)], 0.001)
    bn.add("CP", [("ST", False), ("PA", True)], 0.011)

    bn.add("CEA", [("ST", True)], 0.90)
    bn.add("CEA", [("ST", False)], 0.001)

    bn.add("PA", [("UPAL", True)], 0.25)
    bn.add("PA", [("UPAL", False)], 0.04)

    bn.add("FEUR", [("UPAL", False), ("PA", True)], 0.10)
    bn.add("FEUR", [("UPAL", False), ("PA", False)], 0.01)
    bn.add("FEUR", [("UPAL", True), ("PA", True)], 0.90)
示例#11
0
def classify_using_attributes():
  from sklearn.ensemble import RandomForestClassifier
  from sklearn import svm
  from sklearn.metrics import classification_report
  from sklearn import cross_validation


  makes = ['bmw', 'ford']
  types = ['sedan', 'suv']
  args = makes + types + ['germany', 'usa']
  
#   args = get_args_from_file('sorted_attrib_list.txt')
  config = get_config()
  (dataset, config) = fgu.get_all_metadata(config)
  config.attribute.names = args

  attrib_names = [str.lower(a) for a in args]
  attrib_classifiers = []
  for attrib_name in args:
    attrib_classifiers.append(AttributeClassifier.load('../../../attribute_classifiers/{}.dat'.format(attrib_name)))

  classes = dataset['class_meta']
  train_annos = dataset['train_annos']
  test_annos = dataset['test_annos']
  attrib_meta = dataset['attrib_meta']
  
  classes = select_small_set_for_bayes_net(dataset, makes, types)
  
  attrib_meta = attrib_meta.loc[classes.index]
  train_annos = train_annos[np.array(
                             train_annos.class_index.isin(classes.class_index))]
  test_annos = test_annos[np.array(
                              test_annos.class_index.isin(classes.class_index))]

  ftr = Bow.load_bow(train_annos, config)
  fte = Bow.load_bow(test_annos, config)
  
  bnet = BayesNet(config, train_annos,
                  classes, attrib_classifiers, attrib_meta, desc=str(args))

  attrib_res_train,l = bnet.create_attrib_res_on_images(train_annos, ftr)
  attrib_res_test,l = bnet.create_attrib_res_on_images(test_annos, fte)
  
  
#   features_train = Bow.load_bow(train_annos, config)
#   features_test = Bow.load_bow(test_annos, config)
  
  # combine attribs and features
  features_train = np.concatenate([ftr, attrib_res_train[attrib_names]], axis=1)
  features_test = np.concatenate([fte, attrib_res_test[attrib_names]], axis=1)


  # define a classifier that uses the attribute scores
#   clf = RandomForestClassifier(n_estimators=50, n_jobs=-2)
#   clf = svm.SVC(kernel='rbf')
  clf = svm.LinearSVC()

  labels_train = np.array(attrib_res_train.class_index)
#   features_train = np.array(attrib_res_train[attrib_names])
  clf.fit(features_train, labels_train)


#   features_test = np.array(attrib_res_test[attrib_names])
  y_pred = clf.predict(features_test)
  labels_test = np.array(attrib_res_test.class_index)

  print(classification_report(labels_test, y_pred,
                              labels=classes.index,
                              target_names=[c for c in classes.class_name]))
  
  print("Accuracy: {}".format(accuracy_score(labels_test, y_pred)))
  print("Mean Accuracy: {}".format(clf.score(features_test, labels_test)))
  
  
  print ''
  print 'Accuracy at N:'
  scorer = AccuracyAtN(clf.decision_function(features_test), 
                       labels_test, class_names=np.unique(labels_train))
  for ii in range(1, 11):
    print 'Accuracy at {}: {}'.format(ii, scorer.get_accuracy_at(ii))
    
  
  
  
  dummy_1 = DummyClassifier(strategy='most_frequent').fit(features_train, labels_train)
  dummy_2 = DummyClassifier(strategy='stratified').fit(features_train, labels_train)
  dummy_3 = DummyClassifier(strategy='stratified').fit(features_train, labels_train)
  
  print ''
  print 'Dummy Classifiers:'
  print '-----------------'
  print("Accuracy - most_frequent: {}".format(accuracy_score(labels_test, dummy_1.predict(features_test))))
  print("Accuracy - stratified: {}".format(accuracy_score(labels_test, dummy_2.predict(features_test))))
  print("Accuracy - uniform: {}".format(accuracy_score(labels_test, dummy_2.predict(features_test))))
  
  print("Mean Accuracy - most_frequent: {}".format(dummy_1.score(features_test, labels_test)))
  print("Mean Accuracy - stratified: {}".format(dummy_2.score(features_test, labels_test)))
  print("Mean Accuracy - uniform: {}".format(dummy_3.score(features_test, labels_test)))
示例#12
0
    def best_neighbour(self, names, data, g0, max_parents):
        """
		Find best neighboor of a BN

		Args
			names (list of string): the name of the nodes
			data (np array): (nsamples, nfeatures)
			g0 (BayesNet): the reference

		Returns
			g: best neighbour
			s: score of best neighbour

		"""
        print "Searching for best neighbour"
        # reference variables
        n = g0.n
        r = g0.compute_r(data)
        s0 = g0.score(data)

        # best candidate so far
        g = BayesNet(bn=g0)
        s = s0
        s_eps = s0
        found_new = False

        # working graph
        g_work = BayesNet(bn=g0)
        if max_parents is None:
            max_parents = n - 1

        def update_best(mode="add"):
            """
			When called, evaluate the working graph and update best candidate
			The s update must take place out of the function scope for python limitations
			"""
            # if mode == "rem" or not g_work.is_cyclic():
            s_new = s0 - s_i + g_work.score_node(i, data, r)
            # we give a random advantage to the candidate based on previous updates
            s_eps_new = s_new + self.epsilon * np.random.rand()
            if s_eps_new > s_eps:
                print "Found new candidate ({}) at {}".format(mode, s_new)
                g.copy(g_work)
                return s_new, s_eps_new, True

            return s, s_eps, found_new

        # iterate over node center of the modification
        for i in xrange(n):
            parents = g0.parents[i]
            s_i = g0.score_node(i, data, r)
            # 1. remove edge
            for j in parents:
                g_work.remove_edge(j, i)
                s, s_eps, found_new = update_best("rem")
                g_work.add_edge(j, i)

            # 2. add edge
            if len(parents) < max_parents:
                for j_prime in xrange(n):
                    if j_prime not in parents:
                        if g_work.add_edge(j_prime, i):
                            s, s_eps, found_new = update_best("add")
                            g_work.remove_edge(j_prime, i)

            # 3. reverse direction
            for j in parents:
                if len(g0.parents[j]) < max_parents:
                    g_work.remove_edge(j, i)
                    if g_work.add_edge(i, j):
                        s, s_eps, found_new = update_best("rev")
                        g_work.remove_edge(i, j)
                    g_work.add_edge(j, i)

        self.update_epsilon(s0, s)
        return g, s, found_new
示例#13
0
      Figure 14.9 The enumeration algorithm for answering queries on Bayesian
      networks. <br>
      <br>
     <b>Note:</b> The implementation has been extended to handle queries with
      multiple variables. <br>
    """
    if len(variables) == 0:
        return 1.0
    y = variables[0]
    p_vals = [evidence[parent] for parent in y.potential.othernodes]
    if y in evidence.keys():
        return get_cpt_entry(y, evidence[y], p_vals) * enumerate_all(variables[1:], evidence)
    val = 0.0
    for i in range(len(y.states)):
        evidence_copy = evidence.copy()
        evidence_copy[y] = i
        val += get_cpt_entry(y, i, p_vals) * enumerate_all(variables[1:], evidence_copy)
    return val

if __name__ == "__main__":
    nodes, potentials = netlog('./asia.net')
    bn = BayesNet(nodes, potentials)

    n_asia = findnode('asia', nodes) 
    n_either = findnode('either', nodes)
    n_xray = findnode('xray', nodes)
    ev = {n_xray : 1, n_either : 1}

    ordered_vars = [potentials[i].node for i in t_ordered_vars(potentials)]
    print (enumeration_ask(bn, n_asia, ev))
        'A0': [0.5, 0.5],
        'A1': [1 - 1e-6, 1e-6]
    },
    E: {
        'B0C0': [0.1, 0.9],
        'B0C1': [1e-7, 1 - 1e-7],
        'B1C0': [1 - 1e-10, 1e-10],
        'B1C1': [1e-5, 1 - 1e-5]
    },
    D: {
        'A0': [0.4, 0.6],
        'A1': [0.1, 0.9]
    }
}

net = BayesNet(graph=GRAPH, cpt=CPT)
samples = net.msample()

print(mean([s[B] for s in samples]))


def spread(w):
    """
    The closer to zero, the better.
    """
    result = max(w) / sum(w)
    return result


def f(x):
    return 1