def one_predictive_var(): Y = RandomVar('Y', 2) X1 = RandomVar('X1', 2) X2 = RandomVar('X2', 2) X3 = RandomVar('X3', 2) f_X1_Y = CPD([X1, Y], [1.0, 0.0, 0.0, 1.0]) f_X2_Y = CPD([X2, Y], [0.5, 0.5, 0.5, 0.5]) f_X3_Y = CPD([X3, Y], [0.5, 0.5, 0.5, 0.5]) f_Y = CPD([Y], [0.5, 0.5]) bn = BayesianNetwork([f_Y, f_X1_Y, f_X2_Y, f_X3_Y]) # Training the model fs = ForwardSampler(bn) fs.sample(1000) scope, X = fs.samples_to_matrix() y = X[:, -1] X = X[:, 0:-1] nb = NaiveBayes() nb.fit(X, y) # Evaluating the model fs = ForwardSampler(bn) fs.sample(10) _, X = fs.samples_to_matrix() print(nb.score(X[:, 0:-1], X[:, -1])) print(nb.predict_proba(X[:, 0:-1]))
def build_genetic_network(parents, allele_freqs, prob_trait_genotype): prob_trait_genotype = np.array(prob_trait_genotype) variables = {} for person in parents.keys(): v1 = RandomVar(person + '_allele_1', len(allele_freqs)) v2 = RandomVar(person + '_allele_2', len(allele_freqs)) v3 = RandomVar(person + '_trait', 2) variables[person] = [v1, v2, v3] factors = [] for person in parents.keys(): v1, v2, v3 = variables[person] if parents[person]: p1_vars = variables[parents[person][0]] p2_vars = variables[parents[person][1]] f_allele1 = allele_given_parent_alleles(v1, p1_vars) f_allele2 = allele_given_parent_alleles(v2, p2_vars) else: f_allele1 = CPD([v1], allele_freqs) f_allele2 = CPD([v2], allele_freqs) f_phenotype = phenotype_given_genotype(variables[person], prob_trait_genotype) factors += [f_allele1, f_allele2, f_phenotype] return BayesianNetwork(factors)
def main(): parents = {'alice': [], 'bob': [], 'eve': ['bob', 'alice']} allele_freqs = [0.9, 0.1] prob_trait_genotype = [[0.0, 0.0], [0.0, 1.0]] bn = build_genetic_network(parents, allele_freqs, prob_trait_genotype) # Examples alice_trait = RandomVar('alice_trait', 2) bob_trait = RandomVar('bob_trait', 2) eve_trait = RandomVar('eve_trait', 2) # alice_allele_1 = RandomVar('alice_allele_1', 2) # alice_allele_2 = RandomVar('alice_allele_2', 2) # # bob_allele_1 = RandomVar('bob_allele_1', 2) # bob_allele_2 = RandomVar('bob_allele_2', 2) # # eve_allele_1 = RandomVar('eve_allele_1', 2) # eve_allele_2 = RandomVar('eve_allele_2', 2) ve = JointMarginalization(bn) print(ve.posterior([eve_trait])) print(ve.posterior([eve_trait], [(bob_trait, 1)])) print(ve.posterior([alice_trait, bob_trait], [(eve_trait, 1)])) fs = ForwardSampler(bn) fs.sample(1000) print( fs.posterior([(alice_trait, 0), (bob_trait, 0), (eve_trait, 1)]) / fs.posterior([(eve_trait, 1)])) print(bn.graph())
def fit(self, X, y): """Fit a Multinomial Naive Bayes model to the data. Parameters: ----------- X : two-dimensional np.array or python matrix of integers Matrix representing the observations. It is assumed that `X[:, i]` is a sample from a discrete random variable $X_i$ that takes values between `0` and `X[:, i].max()` y : one-dimensional np.array or python list of integers Array representing the classes assigned to each observation """ X = np.asarray(X, dtype=np.int) if X.min() < 0: raise Exception('Invalid samples') self.classes_, y = np.unique(y, return_inverse=True) C = RandomVar('Y', len(self.classes_)) scope = [] for i in range(X.shape[1]): scope.append(RandomVar('X{0}'.format(i), X[:, i].max() + 1)) graph = {v: set() for v in scope} graph[C] = set(scope) scope.append(C) Xy = np.concatenate([X, y.reshape(-1, 1)], axis=1) self.bn_ = UniformDirichlet(scope).fit_predict(Xy, graph) self.scope_ = scope return self
def main(): x1 = RandomVar('X1', 2) x2 = RandomVar('X2', 2) x3 = RandomVar('X3', 2) fx1 = CPD([x1], [0.11, 0.89]) fx2_x1 = CPD([x2, x1], [0.59, 0.22, 0.41, 0.78]) fx3_x2 = CPD([x3, x2], [0.39, 0.06, 0.61, 0.94]) bn = BayesianNetwork([fx1, fx2_x1, fx3_x2]) # mn = MarkovNetwork([fx1, fx2_x1, fx3_x2]) ve = VariableElimination(bn) jm = JointMarginalization(bn) print(ve.posterior([x1, x2], [(x3, 0)])) print(jm.posterior([x1, x2], [(x3, 0)])) print(ve.posterior([x1, x2, x3])) print(jm.posterior([x1, x2, x3])) print(ve.maximum_a_posteriori(evidence=[(x3, 0)])) print(jm.maximum_a_posteriori([x1, x2], [(x3, 0)])) fs = ForwardSampler(bn) fs.sample(10000) for c in itertools.product(range(2), repeat=3): print('{0}: {1}'.format(c, fs.posterior(zip([x1, x2, x3], c)))) px3_0 = fs.posterior([(x3, 0)]) for c in itertools.product(range(2), repeat=2): assg = list(zip([x1, x2], c)) + [(x3, 0)] print('{0}: {1}'.format(c, fs.posterior(assg) / px3_0)) gs = GibbsSampler(bn) gs.sample(burn_in=1000, n=2000) for c in itertools.product(range(2), repeat=3): print('{0}: {1}'.format(c, gs.posterior(zip([x1, x2, x3], c)))) gs.reset() gs.sample(burn_in=1000, n=1000, evidence=[(x3, 0)]) for c in itertools.product(range(2), repeat=2): print('{0}: {1}'.format(c, gs.posterior(zip([x1, x2], c))))
def simple_sampling(): V1 = RandomVar('V1', 3) V2 = RandomVar('V2', 3) V3 = RandomVar('V3', 5) scope = [V1, V2, V3] graph = {V1: {V3}, V2: {V3}, V3: set()} X = np.zeros((1000, 3), dtype=np.int) X[:, 0:2] = np.random.choice(range(3), size=(X.shape[0], 2), p=[0.2, 0.5, 0.3]) X[:, 2] = X[:, 0] + X[:, 1] mle = MaximumLikelihood(scope) print(mle.fit_predict(X, graph)) ud = UniformDirichlet(scope, alpha=1.0) print(ud.fit_predict(X, graph))
def three_variables(): M = RandomVar('Market', 3) F = RandomVar('Found', 2) uMF = Factor([M, F], [0, -7, 0, 5, 0, 20]) cM = CPD([M], [0.5, 0.3, 0.2]) # Alternative decision rules for F dF_1 = CPD([F], [1.0, 0]) dF_2 = CPD([F], [0, 1.0]) # Optimal id = InfluenceDiagram([cM], [uMF]) eu = ExpectedUtility(id) print(eu.expected_utility([dF_1])) print(eu.expected_utility([dF_2])) print(eu.optimal_decision_rule([F]))
def six_variables(): M = RandomVar('Market', 3) S = RandomVar('Survey', 4) # S = 3 means no survey T = RandomVar('Test', 2) F = RandomVar('Found', 2) uMF = Factor([M, F], [0, -7, 0, 5, 0, 20]) uT = Factor([T], [0, -1]) cM = CPD([M], [0.5, 0.3, 0.2]) cST = CPD([S, M, T], [ 0.0, 0.6, 0.0, 0.3, 0.0, 0.1, 0.0, 0.3, 0.0, 0.4, 0.0, 0.4, 0.0, 0.1, 0.0, 0.3, 0.0, 0.5, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0 ]) # Alternative decision rules for F given S dFS_1 = CPD([F, S], [0, 0, 0, 1, 1, 1, 1, 0]) dFS_2 = CPD([F, S], [1, 0, 0, 0, 0, 1, 1, 1]) # Optimal # Alternative decision rules for T dT_1 = CPD([T], [1.0, 0.0]) dT_2 = CPD([T], [0.0, 1.0]) # Optimal id = InfluenceDiagram([cM, cST], [uMF, uT]) eu = ExpectedUtility(id) print(eu.expected_utility([dFS_1, dT_1])) print(eu.expected_utility([dFS_1, dT_2])) print(eu.expected_utility([dFS_2, dT_1])) print(eu.expected_utility([dFS_2, dT_2])) # New influence diagram with a single decision rule dT = dT_2 id2 = InfluenceDiagram([cM, cST, dT], [uMF, uT]) eu2 = ExpectedUtility(id2) dFS_optimal = eu2.optimal_decision_rule([F, S]) print(eu.expected_utility([dFS_optimal, dT]))
def earthquake(): B = RandomVar('B', 2) E = RandomVar('E', 2) A = RandomVar('A', 2) R = RandomVar('R', 2) a_be = CPD([A, B, E], [0.999, 0.01, 0.01, 0.0001, 0.001, 0.99, 0.99, 0.9999]) r_e = CPD([R, E], [1.0, 0.0, 0.0, 1.0]) b = CPD([B], [0.99, 0.01]) e = CPD([E], [0.999, 0.001]) bn = BayesianNetwork([a_be, r_e, b, e]) fs = ForwardSampler(bn) fs.sample(1000) scope, X = fs.samples_to_matrix() graph = bn.graph() # graph = {B : set(), E: set(), A: set(), R: set()} score_l = LikelihoodScore(scope).fit(X, graph).score print(score_l) score_bic = BICScore(scope).fit(X, graph).score print(score_bic) score_b = BayesianScore(scope).fit(X, graph).score print(score_b) # scorer = LikelihoodScore(scope) # scorer = BICScore(scope) scorer = BayesianScore(scope) best_graph, best_score = restarting_local_search(X, scope, scorer, restarts=1, iterations=100, epsilon=0.2, verbose=1) print('Best:') print(best_score) print(best_graph)
def main(): B = RandomVar('B', 2) E = RandomVar('E', 2) A = RandomVar('A', 2) R = RandomVar('R', 2) a_be = CPD([A, B, E], [0.999, 0.01, 0.01, 0.0001, 0.001, 0.99, 0.99, 0.9999]) r_e = CPD([R, E], [1.0, 0.0, 0.0, 1.0]) b = CPD([B], [0.99, 0.01]) e = CPD([E], [0.999, 0.001]) bn = BayesianNetwork([a_be, r_e, b, e]) ve = VariableElimination(bn) jm = JointMarginalization(bn) print(ve.posterior([B, E, A, R]) == jm.posterior([B, E, A, R])) fs = ForwardSampler(bn) fs.sample(1000)
def occurrence_counter(): x1 = RandomVar('X1', 2) x2 = RandomVar('X2', 2) x3 = RandomVar('X3', 2) graph = {x1: {x2}, x2: {x3}, x3: set()} scope = [x1, x2, x3] X = np.array([[0, 1, 1], [0, 1, 0], [1, 0, 0]]) oc = OccurrenceCounter(scope, maxlen=4) oc.fit(X, graph) oc.refit(graph).stats print(oc.stats) print(oc.last_scopes) graph = {x1: set(), x2: set(), x3: set()} oc.refit(graph) print(oc.stats) print(oc.last_scopes)
def simple_chain(): x1 = RandomVar('X1', 2) x2 = RandomVar('X2', 2) x3 = RandomVar('X3', 2) fx1 = CPD([x1], [0.11, 0.89]) fx2_x1 = CPD([x2, x1], [0.59, 0.22, 0.41, 0.78]) fx3_x2 = CPD([x3, x2], [0.39, 0.06, 0.61, 0.94]) bn = BayesianNetwork([fx1, fx2_x1, fx3_x2]) graph = bn.graph() print(bn) fs = ForwardSampler(bn) fs.sample(1000) scope, X = fs.samples_to_matrix() mle = MaximumLikelihood(scope) print(mle.fit_predict(X, graph)) ud = UniformDirichlet(scope, alpha=1.0) print(ud.fit_predict(X, graph))
def simple_chain(): x1 = RandomVar('X1', 2) x2 = RandomVar('X2', 2) x3 = RandomVar('X3', 2) fx1 = CPD([x1], [0.11, 0.89]) fx2_x1 = CPD([x2, x1], [0.59, 0.22, 0.41, 0.78]) fx3_x2 = CPD([x3, x2], [0.39, 0.06, 0.61, 0.94]) bn = BayesianNetwork([fx1, fx2_x1, fx3_x2]) fs = ForwardSampler(bn) fs.sample(2000) scope, X = fs.samples_to_matrix() graph = bn.graph() # graph = {x1 : set(), x2: set(), x3: set()} score_l = LikelihoodScore(scope).fit(X, graph).score print(score_l) score_bic = BICScore(scope).fit(X, graph).score print(score_bic) score_b = BayesianScore(scope).fit(X, graph).score print(score_b) # scorer = LikelihoodScore(scope) scorer = BICScore(scope) # scorer = BayesianScore(scope) best_graph, best_score = restarting_local_search(X, scope, scorer, restarts=5, iterations=50, epsilon=0.2, verbose=1) print('Best:') print(best_score) print(best_graph)
def traffic(): A = RandomVar('A', 2) T = RandomVar('T', 2) P = RandomVar('P', 2) fP = CPD([P], [0.99, 0.01]) fA = CPD([A], [0.9, 0.1]) fT_AP = CPD([T, P, A], [0.9, 0.5, 0.4, 0.1, 0.1, 0.5, 0.6, 0.9]) bn = BayesianNetwork([fP, fA, fT_AP]) print(bn) fs = ForwardSampler(bn) fs.sample(1000) scope, X = fs.samples_to_matrix() mle = MaximumLikelihood(scope) print(mle.fit_predict(X, bn.graph())) ud = UniformDirichlet(scope, alpha=1.0) print(ud.fit_predict(X, bn.graph()))
def traffic(): A = RandomVar('A', 2) T = RandomVar('T', 2) P = RandomVar('P', 2) fP = CPD([P], [0.99, 0.01]) fA = CPD([A], [0.9, 0.1]) fT_AP = CPD([T, P, A], [0.9, 0.5, 0.4, 0.1, 0.1, 0.5, 0.6, 0.9]) bn = BayesianNetwork([fP, fA, fT_AP]) # print(bn) fs = ForwardSampler(bn) fs.sample(2000) scope, X = fs.samples_to_matrix() graph = bn.graph() score_l = LikelihoodScore(scope).fit(X, graph).score print(score_l) score_bic = BICScore(scope).fit(X, graph).score print(score_bic) score_b = BayesianScore(scope).fit(X, graph).score print(score_b) # scorer = LikelihoodScore(scope) scorer = BICScore(scope) # scorer = BayesianScore(scope) best_graph, best_score = restarting_local_search(X, scope, scorer, restarts=5, iterations=50, epsilon=0.2, verbose=1) print('Best:') print(best_score) print(best_graph)
def main(): A = RandomVar('A', 2) T = RandomVar('T', 2) P = RandomVar('P', 2) fP = CPD([P], [0.99, 0.01]) fA = CPD([A], [0.9, 0.1]) fT_AP = CPD([T, P, A], [0.9, 0.5, 0.4, 0.1, 0.1, 0.5, 0.6, 0.9]) bn = BayesianNetwork([fP, fA, fT_AP]) ve = VariableElimination(bn) jm = JointMarginalization(bn) print(jm.maximum_a_posteriori([A], [(T, 1)])) print(ve.posterior([A], [(T, 1)])) print(jm.posterior([A], [(T, 1)])) print(ve.posterior([A, T, P])) print(jm.posterior([A, T, P]))
def simple_sampling(): V1 = RandomVar('V1', 3) V2 = RandomVar('V2', 3) V3 = RandomVar('V3', 5) scope = [V1, V2, V3] graph = {V1: {V3}, V2: {V3}, V3: set()} X = np.zeros((1000, 3), dtype=np.int) X[:, 0:2] = np.random.choice(range(3), size=(X.shape[0], 2), p=[0.2, 0.5, 0.3]) X[:, 2] = X[:, 0] + X[:, 1] score_l = LikelihoodScore(scope).fit(X, graph).score print(score_l) score_bic = BICScore(scope).fit(X, graph).score print(score_bic) score_b = BayesianScore(scope).fit(X, graph).score print(score_b) # scorer = LikelihoodScore(scope) scorer = BICScore(scope) # scorer = BayesianScore(scope) best_graph, best_score = restarting_local_search(X, scope, scorer, restarts=5, iterations=50, epsilon=0.2, verbose=1) print('Best:') print(best_score) print(best_graph) print(BayesianScore(scope).fit(X, best_graph).score)
def earthquake(): B = RandomVar('B', 2) E = RandomVar('E', 2) A = RandomVar('A', 2) R = RandomVar('R', 2) a_be = CPD([A, B, E], [0.999, 0.01, 0.01, 0.0001, 0.001, 0.99, 0.99, 0.9999]) r_e = CPD([R, E], [1.0, 0.0, 0.0, 1.0]) b = CPD([B], [0.99, 0.01]) e = CPD([E], [0.999, 0.001]) bn = BayesianNetwork([a_be, r_e, b, e]) print(bn) fs = ForwardSampler(bn) fs.sample(1000) scope, X = fs.samples_to_matrix() mle = MaximumLikelihood(scope) print(mle.fit_predict(X, bn.graph())) ud = UniformDirichlet(scope, alpha=1.0) print(ud.fit_predict(X, bn.graph()))