def specificquery(self, query, evidence): ''' .. note: Shortcut method to the *specificquery* method in :doc:`tablecpdfactorization` Eliminate all variables except for the ones specified by *query*. Adjust all distributions to reflect *evidence*. Return the entry that matches the exact probability of a specific event, as specified by *query*. Arguments: 1. *query* -- A dict containing (key: value) pairs reflecting (variable: value) that represents what outcome to calculate the probability of. The value of the query is a list of one or more values that can be taken by the variable. 2. *evidence* -- A dict containing (key: value) pairs reflecting (variable: value) evidence that is known about the system. Returns: - the probability that the event (or events) specified will occur, represented as a float between 0 and 1. Note that in this function, queries of the type P((x=A or x=B) and (y=C or y=D)) are permitted. They are executed by formatting the *query* dictionary like so:: { "x": ["A", "B"], "y": ["C", "D"] } ''' # validate if not (hasattr(self, "V") and hasattr(self, "E") and hasattr(self, "Vdata")): raise notloadedError("Bayesian network is missing essential attributes") assert isinstance(query, dict) and isinstance(evidence, dict), "query and evidence must be dicts" for k in query.keys(): assert isinstance(query[k], list), "the values of your query must be lists, even if singletons" # calculate fn = TableCPDFactorization(self) return fn.specificquery(query, evidence)
def recur(sc, temp, number, bn, val, jp): if number != 0: for i in range(2): sc[bn.V[len(bn.V) - number]] = val[i] recur(sc, temp, number - 1, bn, val, jp) else: result = [] p = 1 temp = [] for j in range(len(bn.V)): pa = bn.Vdata[bn.V[j]]['parents'] if pa: fn = TableCPDFactorization(bn) evidence = {} for k in range(len(pa)): evidence[pa[k]] = sc[pa[k]] query = {bn.V[j]: list(sc[bn.V[j]])} result.append(fn.specificquery(query, evidence)) else: if sc[bn.V[j]] == '0': result.append(bn.Vdata[bn.V[j]]['cprob'][0]) else: result.append(bn.Vdata[bn.V[j]]['cprob'][1]) temp.append(sc[bn.V[j]]) p = p * result[j] temp.append(p) jp.append(temp)
def compute_vertex_marginal(self, v, evidence): """ :return: a dictionary with: state name -> marginal values ex. {"state1": 0.5, "state2": 0.5} """ query = {v: ''} vertex_marginals = {} states = self.get_states(v) if v in evidence: vals = [] s_evidence = evidence[v] for s in states: if s == s_evidence: vertex_marginals[s] = 1.0 else: vertex_marginals[s] = 0.0 # if query node. else: #marginal values fn = TableCPDFactorization(self.clone()) mar_vals = fn.condprobve(query, evidence) # Associate marginals with values for i in range(len(states)): vertex_marginals[states[i]] = mar_vals.vals[i] return vertex_marginals
def setUp(self): skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() nodedata = NodeData.load("unittestdict.txt") self.bn = DiscreteBayesianNetwork(nodedata) self.fn = TableCPDFactorization(self.bn)
def estimate_distrib(skel, samples, query, evidence): learner = PGMLearner() bayesnet = learner.discrete_mle_estimateparams(skel, samples) tablecpd = TableCPDFactorization(bayesnet) fac = tablecpd.condprobve(query, evidence) df2 = printdist(fac, bayesnet) return df2
def inferCustomerClasses(param_file, evidence_dir, year): """ This function uses the variable elimination algorithm from libpgm to infer the customer class of each AnswerID, given the evidence presented in the socio-demographic survey responses. It returns a tuple of the dataframe with the probability distribution over all classes for each AnswerID and the BN object. """ bn = loadbn(param_file) evidence, a_id = readEvidence(year, evidence_dir) query = {"customer_class": ''} cols = bn.Vdata.get('customer_class')['vals'] result = pd.DataFrame( columns=cols ) #create empty dataframe in which to store inferred probabilities count = 0 #set counter for e in evidence: bn = loadbn(param_file) fn = TableCPDFactorization(bn) try: inf = fn.condprobve(query, e) classprobs = list(inf.vals) result.loc[count] = classprobs count += 1 except: result.loc[count] = [None] * len(cols) count += 1 result['AnswerID'] = a_id result.set_index(keys='AnswerID', inplace=True) return result
def q_without_ros(): skel = GraphSkeleton() skel.V = ["prize_door", "guest_door", "monty_door"] skel.E = [["prize_door", "monty_door"], ["guest_door", "monty_door"]] skel.toporder() nd = NodeData() nd.Vdata = { "prize_door": { "numoutcomes": 3, "parents": None, "children": ["monty_door"], "vals": ["A", "B", "C"], "cprob": [1.0/3, 1.0/3, 1.0/3], }, "guest_door": { "numoutcomes": 3, "parents": None, "children": ["monty_door"], "vals": ["A", "B", "C"], "cprob": [1.0/3, 1.0/3, 1.0/3], }, "monty_door": { "numoutcomes": 3, "parents": ["prize_door", "guest_door"], "children": None, "vals": ["A", "B", "C"], "cprob": { "['A', 'A']": [0., 0.5, 0.5], "['B', 'B']": [0.5, 0., 0.5], "['C', 'C']": [0.5, 0.5, 0.], "['A', 'B']": [0., 0., 1.], "['A', 'C']": [0., 1., 0.], "['B', 'A']": [0., 0., 1.], "['B', 'C']": [1., 0., 0.], "['C', 'A']": [0., 1., 0.], "['C', 'B']": [1., 0., 0.], }, }, } bn = DiscreteBayesianNetwork(skel, nd) fn = TableCPDFactorization(bn) query = { "prize_door": ["A","B","C"], } evidence = { "guest_door": "A", "monty_door": "B", } res = fn.condprobve(query, evidence) print res.vals print res.scope print res.card print res.stride
def infer(self, sensor_evidence, fsm_evidence): # sensor values are always True; their proxy nodes encode the real probability evidence = dict(fsm_evidence) evidence.update({k: "T" for k in sensor_evidence}) # update probability of proxy nodes for sensor, p in sensor_evidence.iteritems(): self.net.Vdata[sensor]["cprob"] = { "['T']": [p, 1 - p], "['F']": [(1 - p), p] } # refactorize fn = TableCPDFactorization(self.net) events = [] for name, output in self.outputs.iteritems(): fn.refresh() query = {} for q in output["query"]: if is_negated(q): query[normalise_name(q)] = ['F'] else: query[normalise_name(q)] = ['T'] prob = result = fn.specificquery(query, evidence) ev = output["event"] formatted_query = " AND ".join(query) # logging.debug("Query p(%s)=%.8f; need p(%s)>%.8f to trigger event %s/%s" % (formatted_query, prob, formatted_query, 1-np.exp(ev["logp"]), ev.get("fsm", None), ev["event"])) logger.info(json.dumps({ \ 'type' : 'query', 'query' : formatted_query, 'value' : '%.8f' % prob, 'threshold' : '%.8f' % (1-np.exp(ev['logp'])), 'fsm' : ev.get("fsm", None), 'event' : ev['event'] })) if prob > (1 - np.exp(ev["logp"])) + self.event_caution: #logging.debug("Fired event %s/%s" % (ev.get("fsm", None), ev["event"])) logger.info( json.dumps({ 'type': 'fire_event', 'fsm': ev.get("fsm", None), 'event': ev['event'] })) # generate event events.append({ "fsm": ev.get("fsm", None), "event": ev["event"] }) return events
def q_without_ros(): skel = GraphSkeleton() skel.V = ["prize_door", "guest_door", "monty_door"] skel.E = [["prize_door", "monty_door"], ["guest_door", "monty_door"]] skel.toporder() nd = NodeData() nd.Vdata = { "prize_door": { "numoutcomes": 3, "parents": None, "children": ["monty_door"], "vals": ["A", "B", "C"], "cprob": [1.0 / 3, 1.0 / 3, 1.0 / 3], }, "guest_door": { "numoutcomes": 3, "parents": None, "children": ["monty_door"], "vals": ["A", "B", "C"], "cprob": [1.0 / 3, 1.0 / 3, 1.0 / 3], }, "monty_door": { "numoutcomes": 3, "parents": ["prize_door", "guest_door"], "children": None, "vals": ["A", "B", "C"], "cprob": { "['A', 'A']": [0., 0.5, 0.5], "['B', 'B']": [0.5, 0., 0.5], "['C', 'C']": [0.5, 0.5, 0.], "['A', 'B']": [0., 0., 1.], "['A', 'C']": [0., 1., 0.], "['B', 'A']": [0., 0., 1.], "['B', 'C']": [1., 0., 0.], "['C', 'A']": [0., 1., 0.], "['C', 'B']": [1., 0., 0.], }, }, } bn = DiscreteBayesianNetwork(skel, nd) fn = TableCPDFactorization(bn) query = { "prize_door": ["A", "B", "C"], } evidence = { "guest_door": "A", "monty_door": "B", } res = fn.condprobve(query, evidence) print res.vals print res.scope print res.card print res.stride
def calc_BNprob(df_test): result = pd.Series() for row in df_test.itertuples(): tablecpd=TableCPDFactorization(bn) prob_surv = tablecpd.specificquery(dict(Surv='1'), dict(Fare=str(row.Fare) , Sex=str(row.Sex) , Class=str(row.Pclass) )) if prob_surv >= 0.5: surv_class = 1 else: surv_class = 0 result = result.append(pd.Series([surv_class]), ignore_index = True ) return result
def setUp(self): skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() nodedata = NodeData.load("unittestdict.txt") self.bn = DiscreteBayesianNetwork(nodedata) agg = SampleAggregator() agg.aggregate(self.bn.randomsample(50)) self.rseq = agg.seq self.ravg = agg.avg self.fn = TableCPDFactorization(self.bn) evidence = dict(Letter='weak') agg.aggregate(self.fn.gibbssample(evidence, 51)) self.gseq = agg.seq self.gavg = agg.avg
class TestSampleAggregator(unittest.TestCase): def setUp(self): skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() nodedata = NodeData() nodedata.load("unittestdict.txt") self.bn = DiscreteBayesianNetwork(skel, nodedata) agg = SampleAggregator() agg.aggregate(self.bn.randomsample(50)) self.rseq = agg.seq self.ravg = agg.avg self.fn = TableCPDFactorization(self.bn) evidence = dict(Letter='weak') agg.aggregate(self.fn.gibbssample(evidence, 51)) self.gseq = agg.seq self.gavg = agg.avg def test_rseq(self): self.assertTrue(len(self.rseq) == 50) for key in self.ravg.keys(): summ = 0 for entry in self.ravg[key].keys(): summ += self.ravg[key][entry] self.assertTrue(summ > .99 and summ < 1.01) def test_gseq(self): self.assertTrue(len(self.gseq) == 51) for key in self.gavg.keys(): summ = 0 for entry in self.gavg[key].keys(): summ += self.gavg[key][entry] self.assertTrue(summ > .99 and summ < 1.01)
def simple_graph(pz, px1gz, px2gz): pgm = DiscretePGM() pgm.addNode('Z', [0, 1], None, [1 - pz, pz]) pgm.addNode('X1', [0, 1], ['Z'], cpd(px1gz)) pgm.addNode('X2', [0, 1], ['Z'], cpd(px2gz)) model = pgm.construct() factorization = TableCPDFactorization(model) return factorization
def classify(evidence,bn): #q1=dict(Speed=evidence['Speed']) q2=dict(Volume=evidence['Volume']) # del evidence['Speed'] del evidence['Volume'] #fn = TableCPDFactorization(bn)#toolbx #result=fn.condprobve(q1,evidence)#t #mx=max(result.vals) #indx=result.vals.index(mx) #sp= bn.Vdata['Speed']['vals'][indx] fn = TableCPDFactorization(bn)#t result=fn.condprobve(q2,evidence)#t mx=max(result.vals) indx=result.vals.index(mx) vl=bn.Vdata['Volume']['vals'][indx] return [0,vl]
def classify(evidence, bn): #q1=dict(Speed=evidence['Speed']) q2 = dict(Volume=evidence['Volume']) # del evidence['Speed'] del evidence['Volume'] #fn = TableCPDFactorization(bn)#toolbx #result=fn.condprobve(q1,evidence)#t #mx=max(result.vals) #indx=result.vals.index(mx) #sp= bn.Vdata['Speed']['vals'][indx] fn = TableCPDFactorization(bn) #t result = fn.condprobve(q2, evidence) #t mx = max(result.vals) indx = result.vals.index(mx) vl = bn.Vdata['Volume']['vals'][indx] return [0, vl]
def getTableCPD(): nd = NodeData() skel = GraphSkeleton() jsonpath = "" nd.load(jsonpath) skel.load(jsonpath) bn = DiscreteBayesianNetwork(skel, nd) tablecpd = TableCPDFactorization(bn) return tablecpd
def getTableCPD(): nd = NodeData() skel = GraphSkeleton() jsonpath = "./graph/graph_example.txt" nd.load(jsonpath) skel.load(jsonpath) # load Bayesian network bn = DiscreteBayesianNetwork(skel, nd) tablecpd = TableCPDFactorization(bn) return tablecpd
def discrete_query_cb(self, req): nd = U.discrete_nodedata_from_ros(req.nodes) skel = U.graph_skeleton_from_node_data(nd) skel.toporder() bn = DiscreteBayesianNetwork(skel, nd) fn = TableCPDFactorization(bn) q = {n: nd.Vdata[n]["vals"] for n in req.query} ev = {ns.node: ns.state for ns in req.evidence} rospy.loginfo("resolving query %s with evidence %s" % (q, ev)) ans = fn.condprobve(query=q, evidence=ev) rospy.loginfo("%s -> %s" % (ans.scope, ans.vals)) res = DiscreteQueryResponse() node = DiscreteNode() node.name = ans.scope[0] node.outcomes = q[node.name] node.CPT.append(ConditionalProbability(node.outcomes, ans.vals)) res.nodes.append(node) return res
def setup(self): self.nd = NodeData() self.skel = GraphSkeleton() self.skel.V, self.skel.E = [], [] self.nd.Vdata = {} for i, node in enumerate(self.node.values()): dNode = {} node.sId = str(i) dNode["numoutcomes"] = len(node.values) dNode["vals"] = node.values dNode["cprob"] = node.cpt # dNode["parents"] = map(lambda x: if x=x.name, node.parents); self.skel.V.append(node.name) aParents = [] for parent in node.parents: if parent == None: continue aParents.append(parent.name) self.skel.E.append([parent.name, node.name]) dNode["parents"] = aParents if len(aParents) > 0 else None self.nd.Vdata[node.name] = dNode self.skel.toporder() self.bn = DiscreteBayesianNetwork(self.skel, self.nd) self.fn = TableCPDFactorization(self.bn)
def inferPosteriorDistribution( queries, evidence, baynet): # TODO: extend to handle multiple query nodes fn = TableCPDFactorization(baynet) # result = fn.condprobve(query, evidence) #from library result = condprobve2(fn, queries, evidence) # written here print 'result.vals ', result.vals probabilities = printdist(result, baynet) # for index,key in queries: probabilities.sort_values( ['max_def'], inplace=True) # make sure probabilities are listed in order of bins return probabilities
def confounded_graph(n): epsilon = 0.4 pZ = .5 #P(Z = 1) pX0 = .1 #P(X0 = 1) must be <= .5 pXgivenZ = [.4, .3] #P(X=1|Z=0),P(X=1|Z=1) pYgivenX0 = [ .5 - pX0 / (1.0 - pX0) * epsilon, .5 + epsilon, ] #P(Y = 1|X0) pgm = DiscretePGM() pgm.addNode('Z', [0, 1], None, [1 - pZ, pZ]) pgm.addNode('X0', [0, 1], None, [1 - pX0, pX0]) for i in range(1, n): pgm.addNode('X' + str(i), [0, 1], ['Z'], cpd(pXgivenZ)) pgm.addNode('Y', [0, 1], ['X0'], cpd(pYgivenX0)) model = pgm.construct() factorization = TableCPDFactorization(model) return model, factorization
temp.append(float(max(list))/3) temp.append(float(max(list))/3*2) return temp EachLikeThreshold = Threshold(EachLike) EachLikedThreshold = Threshold(EachLiked) print EachLikeThreshold print EachLikedThreshold BulliedPro = [] nd = NodeData() skel = GraphSkeleton() nd.load('unittestdict.txt') skel.load('unittestdict.txt') bn = DiscreteBayesianNetwork(skel, nd) fn = TableCPDFactorization(bn) for i in range(len(EachLike)): evidence = {} if EachLike[i] <= EachLikeThreshold[0]: evidence['LikeN'] = 'Small' elif EachLikeThreshold[0] < EachLike[i] and EachLike[i] <= EachLikeThreshold[1]: evidence['LikeN'] = 'Mid' else: evidence['LikeN'] = 'Big' if EachLiked[i] <= EachLikedThreshold[0]: evidence['LikedN'] = 'Small' elif EachLikedThreshold[0] < EachLiked[i] and EachLiked[i] <= EachLikedThreshold[1]: evidence['LikedN'] = 'Mid' else: evidence['LikedN'] = 'Big'
lgbn = LGBayesianNetwork(skel, nd) text = open("../unifiedMLData2.json") data=text.read() printable = set(string.printable) asciiData=filter(lambda x: x in printable, data) listofDicts=json.loads(asciiData) skel = GraphSkeleton() skel.load("../skeleton.json") learner = PGMLearner() result = learner.discrete_mle_estimateparams(skel, listofDicts) tcf=TableCPDFactorization(result) myquery = dict(rating=[5]) myevidence = dict(occupation='student') res2=tcf.gibbssample(evidence=myevidence,n=3) print json.dumps(res2, indent=2)
class TestTableCPDFactorization(unittest.TestCase): def setUp(self): skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() nodedata = NodeData() nodedata.load("unittestdict.txt") self.bn = DiscreteBayesianNetwork(skel, nodedata) self.fn = TableCPDFactorization(self.bn) def test_constructor(self): self.assertTrue(len(self.fn.originalfactorlist) == 5) for x in range(5): self.assertTrue(isinstance(self.fn.originalfactorlist[x], TableCPDFactor)) def test_refresh(self): evidence = dict(Letter='weak') query = dict(Intelligence=['high']) result1 = self.fn.specificquery(query, evidence) self.fn.refresh() result2 = self.fn.specificquery(query, evidence) self.assertEqual(result1, result2) def test_sumproducteliminatevar(self): self.fn.refresh() self.fn.sumproducteliminatevar("Difficulty") yes = 0 for x in range(len(self.fn.factorlist)): if (self.fn.factorlist[x].scope == ['Grade', 'Intelligence']): yes += 1 index = x self.assertTrue(yes == 1) exp = [0.2, 0.33999999999999997, 0.45999999999999996, 0.74, 0.16799999999999998, 0.09200000000000001] for x in range(6): self.assertTrue(abs(self.fn.factorlist[index].vals[x] - exp[x]) < .01) def test_sumproductve(self): input = ["Difficulty", "Grade", "Intelligence", "SAT"] self.fn.refresh() self.fn.sumproductve(input) exp = [.498, .502] for x in range(2): self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01) def test_condprobve(self): evidence = dict(Grade='C', SAT='highscore') query = dict(Intelligence='high') self.fn.refresh() self.fn.condprobve(query, evidence) exp = [.422, .578] for x in range(2): self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01) def test_specificquery(self): evidence = dict(Difficulty='easy') query = dict(Grade=['A', 'B']) self.fn.refresh() answer = self.fn.specificquery(query, evidence) self.assertTrue(abs(answer - .784) < .01) def test_gibbssample(self): evidence = dict(Letter='weak') gs = self.fn.gibbssample(evidence, 5) self.assertTrue(gs[0]["Difficulty"] == 'easy' or gs[0]["Difficulty"] == 'hard') self.assertTrue(len(gs) == 5) for entry in gs: self.assertTrue(entry["Letter"] == 'weak')
jsonpath_node ="titanic_nodes.json" nd.load(jsonpath_node) skel.load(jsonpath_skel) # load bayesian network bn = DiscreteBayesianNetwork(skel, nd) print (skel.getchildren("Class"),skel.getchildren("Sex"),skel.getchildren("Fare"),skel.getchildren("Surv")) ([u'Surv'], [u'Surv'], [u'Class'], []) # In[ ]: # We can now start querying our network. We provide a query (first dictionary in the arguments) # and an evidence (second dictionary in the args)) tablecpd=TableCPDFactorization(bn) print ("P(Surv=0) = {}".format(tablecpd.specificquery(dict(Surv='0'),dict()))) # In[ ]: tablecpd=TableCPDFactorization(bn) print("P(Surv = 1) = {}".format(tablecpd.specificquery(dict(Surv='1'),dict()))) tablecpd=TableCPDFactorization(bn) print("P(Surv = 1 | Fare = 0) = {}".format(tablecpd.specificquery(dict(Surv='1'),dict(Fare='0')))) tablecpd=TableCPDFactorization(bn) print("P(Surv = 1 | Fare = 1) = {}".format(tablecpd.specificquery(dict(Surv='1'),dict(Fare='1')))) tablecpd=TableCPDFactorization(bn) print("P(Surv = 1 | Fare = 1, Sex = 0) = {}".format(tablecpd.specificquery(dict(Surv='1'),dict(Fare='1' , Sex='0'))))
import json from libpgm.graphskeleton import GraphSkeleton from libpgm.nodedata import NodeData from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork from libpgm.tablecpdfactorization import TableCPDFactorization # load nodedata and graphskeleton nd = NodeData() skel = GraphSkeleton() nd.load("grades.txt") skel.load("grades.txt") # toporder graph skeleton skel.toporder() # load evidence evidence = dict(Letter='weak') # load bayesian network bn = DiscreteBayesianNetwork(skel, nd) # load factorization fn = TableCPDFactorization(bn) # sample result = fn.gibbssample(evidence, 1000) # output print json.dumps(result, indent=2)
skel = GraphSkeleton() skel.load("../skeleton.json") learner = PGMLearner() result = learner.discrete_mle_estimateparams(skel, listofDicts) #print json.dumps(result.randomsample(10), indent=2) #print json.dumps(result.Vdata, indent=2) #nd = NodeData() #nd.load("../tests/unittestdict.txt") #evidence = dict(Letter='weak') tcf = TableCPDFactorization(result) occupations = [ 'administrator', 'artist' 'doctor' 'educator' 'engineer' 'entertainment' 'executive' 'healthcare' 'homemaker' 'lawyer' 'librarian' 'marketing' 'none' 'other'
import json from libpgm.nodedata import NodeData from libpgm.graphskeleton import GraphSkeleton from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork from libpgm.tablecpdfactorization import TableCPDFactorization # load nodedata and graphskeleton nd = NodeData() skel = GraphSkeleton() nd.load("tests/net1.json") # any input file skel.load("tests/net1.json") # topologically order graphskeleton skel.toporder() # load bayesian network bn = DiscreteBayesianNetwork(skel, nd) fn = TableCPDFactorization(bn) # sample result = fn.specificquery(dict(C='T'), dict(B='F')) # output print json.dumps(result, indent=2)
class Graph: def __init__(self): self.node = dict() self.obs = dict() def addnode(self, node): self.node[node.name] = node def removeNode(self, name): if self.node.has_key(name): del self.node[name] def addobs(self, node, value): self.obs[node.name] = [node, value] def removeObs(self, name): if self.obs.has_key(name): del self.obs[name] def setup(self): self.nd = NodeData() self.skel = GraphSkeleton() self.skel.V, self.skel.E = [], [] self.nd.Vdata = {} for i, node in enumerate(self.node.values()): dNode = {} node.sId = str(i) dNode["numoutcomes"] = len(node.values) dNode["vals"] = node.values dNode["cprob"] = node.cpt # dNode["parents"] = map(lambda x: if x=x.name, node.parents); self.skel.V.append(node.name) aParents = [] for parent in node.parents: if parent == None: continue aParents.append(parent.name) self.skel.E.append([parent.name, node.name]) dNode["parents"] = aParents if len(aParents) > 0 else None self.nd.Vdata[node.name] = dNode self.skel.toporder() self.bn = DiscreteBayesianNetwork(self.skel, self.nd) self.fn = TableCPDFactorization(self.bn) # def setup(self): # self.nd = NodeData(); # self.skel = GraphSkeleton(); # self.skel.V, self.skel.E = [], []; # self.nd.Vdata = {}; # for i,node in enumerate(self.node.values()): # dNode = {}; # node.sId = str(i); # dNode["numoutcomes"] = len(node.values); # dNode["vals"] = node.values; # dNode["cprob"] = node.cpt; # # dNode["parents"] = map(lambda x: if x=x.name, node.parents); # self.skel.V.append(node.name); # aParents = []; # for parent in node.parents: # if parent==None: continue; # aParents.append(parent.name); # self.skel.E.append([parent.name, node.name]); # dNode["parents"] = aParents if len(aParents)>0 else None; # self.nd.Vdata[node.name] = dNode; # self.skel.toporder(); # self.bn = DiscreteBayesianNetwork(self.skel, self.nd); # self.fn = TableCPDFactorization(self.bn); def getPost(self, query, evidence): result = self.fn.specificquery(query, evidence) return result def write2dot(self, fname="graph.dot"): f = open(fname, "w") f.write("digraph G {\n") f.write("node[shape=circle, width=0.4];\n") for node in self.node.values(): l = "\"" + node.name + "\"" f.write(node.sId) if node in map(lambda x: x[0], self.obs): f.write("[label=" + l + ",style=filled,color=blue]") else: f.write("[label=" + l + "]") f.write(";\n") for parent in node.parents: if parent == None: continue f.write(parent.sId + " -> " + node.sId + ";\n") f.write("}") f.close() def write2pdf(self, fname="graph.pdf"): if ".pdf" in fname: fname = fname[:-4] pdfFile = fname + ".pdf" dotFile = fname + ".dot" self.write2dot(dotFile) call(['dot', '-Tpdf', dotFile, '-o', pdfFile])
nd = NodeData() skel = GraphSkeleton() nd.load("../tests/unittestdict.txt") skel.load("../tests/unittestdict.txt") # toporder graph skeleton skel.toporder() # load evidence evidence = dict(Letter='weak') # load bayesian network bn = DiscreteBayesianNetwork(skel, nd) # load factorization fn = TableCPDFactorization(bn) # sample result = fn.gibbssample(evidence, 10) # output - toggle comment to see #print json.dumps(result, indent=2) # (5) -------------------------------------------------------------------------- # Compute the probability distribution over a specific node or nodes # load nodedata and graphskeleton nd = NodeData() skel = GraphSkeleton() nd.load("../tests/unittestdict.txt") skel.load("../tests/unittestdict.txt")
def inference(bn, evidence): fn = TableCPDFactorization(bn) result = fn.gibbssample(evidence, GIBBS_ITERATIONS) agg = SampleAggregator() result = agg.aggregate(result) return json.dumps(result, indent=2)
class TestTableCPDFactorization(unittest.TestCase): def setUp(self): skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() nodedata = NodeData() nodedata.load("unittestdict.txt") self.bn = DiscreteBayesianNetwork(skel, nodedata) self.fn = TableCPDFactorization(self.bn) def test_constructor(self): self.assertTrue(len(self.fn.originalfactorlist) == 5) for x in range(5): self.assertTrue(isinstance(self.fn.originalfactorlist[x], TableCPDFactor)) def test_refresh(self): self.fn.refresh() for x in range(5): self.assertTrue(isinstance(self.fn.factorlist[x], TableCPDFactor)) def test_sumproducteliminatevar(self): self.fn.refresh() self.fn.sumproducteliminatevar("Difficulty") yes = 0 for x in range(len(self.fn.factorlist)): if (self.fn.factorlist[x].scope == ['Grade', 'Intelligence']): yes += 1 index = x self.assertTrue(yes == 1) exp = [0.2, 0.33999999999999997, 0.45999999999999996, 0.74, 0.16799999999999998, 0.09200000000000001] for x in range(6): self.assertTrue(abs(self.fn.factorlist[index].vals[x] - exp[x]) < .01) def test_sumproductve(self): input = ["Difficulty", "Grade", "Intelligence", "SAT"] self.fn.refresh() self.fn.sumproductve(input) exp = [.498, .502] for x in range(2): self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01) def test_condprobve(self): evidence = dict(Grade='C', SAT='highscore') query = dict(Intelligence='high') self.fn.refresh() self.fn.condprobve(query, evidence) exp = [.422, .578] for x in range(2): self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01) def test_specificquery(self): evidence = dict(Difficulty='easy') query = dict(Grade=['A', 'B']) self.fn.refresh() answer = self.fn.specificquery(query, evidence) self.assertTrue(abs(answer - .784) < .01) def test_gibbssample(self): evidence = dict(Letter='weak') gs = self.fn.gibbssample(evidence, 5) self.assertTrue(gs[0]["Difficulty"] == 'easy' or gs[0]["Difficulty"] == 'hard') self.assertTrue(len(gs) == 5) for entry in gs: self.assertTrue(entry["Letter"] == 'weak')
# Compute the probability distribution over a specific node or nodes # load nodedata and graphskeleton nd = NodeData() skel = GraphSkeleton() nd.load("../tests/unittestdict.txt") skel.load("../tests/unittestdict.txt") # toporder graph skeleton print skel.toporder() # load evidence evidence = {"Intelligence": "high"} query = {"Grade": "A"} # load bayesian network bn = DiscreteBayesianNetwork(skel, nd) # load factorization fn = TableCPDFactorization(bn) # # calculate probability distribution # result = fn.condprobve(query, evidence) # print json.dumps(result.vals, indent=2) # print json.dumps(result.scope, indent=2) # print json.dumps(result.card, indent=2) # print json.dumps(result.stride, indent=2) result = fn.specificquery(query, evidence) print result
clean = int(testdf.iloc[i]["clean"]) # # small = int(testdf.iloc[i]["small"]) bad = int(testdf.iloc[i]["bad"]) old = int(testdf.iloc[i]["old"]) Rooms = int(testdf.iloc[i]["Rooms"]) Location = int(testdf.iloc[i]["Location"]) Service = int(testdf.iloc[i]["Service"]) Cleanliness = int(testdf.iloc[i]["Cleanliness"]) #Checkin = int(testdf.iloc[i]["Checkin"]) #Businessservice = int(testdf.iloc[i]["Businessservice"]) Value = int(testdf.iloc[i]["Value"]) Overall = int(testdf.iloc[i]["Overall"]) #append the overall score to the target list target.append(Overall) #getting all cpt from our model a = TableCPDFactorization(res) #compute the query and evidences as dicts query = dict(Overall=Overall) evidence = dict(Service=Service, Location=Location, Cleanliness=Cleanliness, Value=Value, bad=bad, Rooms=Rooms, old=old, good=good, great=great, comfortable=comfortable) #Checkin=Checkin,Businessservice=Businessservice #run the query given evidence result = a.condprobve(query, evidence)
from libpgm.nodedata import NodeData from libpgm.graphskeleton import GraphSkeleton from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork from libpgm.lgbayesiannetwork import LGBayesianNetwork from libpgm.hybayesiannetwork import HyBayesianNetwork from libpgm.dyndiscbayesiannetwork import DynDiscBayesianNetwork from libpgm.tablecpdfactorization import TableCPDFactorization from libpgm.sampleaggregator import SampleAggregator from libpgm.pgmlearner import PGMLearner text = open("../unifiedMLData2.json") data = text.read() printable = set(string.printable) asciiData = filter(lambda x: x in printable, data) #print asciiData #listofDicts=json.dumps(data) listofDicts = json.loads(asciiData) #print listofDicts[0] skel = GraphSkeleton() skel.load("../skeleton.json") learner = PGMLearner() result = learner.discrete_mle_estimateparams(skel, listofDicts) tcf = TableCPDFactorization(result) print tcf
text = open("../unifiedMLData2.json") data=text.read() printable = set(string.printable) asciiData=filter(lambda x: x in printable, data) listofDicts=json.loads(asciiData) skel = GraphSkeleton() skel.load("../skeleton.json") learner = PGMLearner() result = learner.discrete_mle_estimateparams(skel, listofDicts) tcf=TableCPDFactorization(result) #Rating 1 Given Genre is Drama myquery = dict(rating=[1]) myevidence = dict(genre='Drama') result=tcf.specificquery(query=myquery,evidence=myevidence) print result tcf.refresh() #Rating 2 Given Genre is Drama myquery = dict(rating=[2]) myevidence = dict(genre='Drama') result=tcf.specificquery(query=myquery,evidence=myevidence) print result
text = open("../unifiedMLData2.json") data = text.read() printable = set(string.printable) asciiData = filter(lambda x: x in printable, data) #listofDicts=json.dumps(data) listofDicts = json.loads(asciiData) skel = GraphSkeleton() skel.load("../skeleton.json") learner = PGMLearner() result = learner.discrete_mle_estimateparams(skel, listofDicts) tcf = TableCPDFactorization(result) #Rating 1 Given Occupation is student myquery = dict(rating=[1]) myevidence = dict(occupation='student') result = tcf.specificquery(query=myquery, evidence=myevidence) print result tcf.refresh() #Rating 2 Given Occupation is student myquery = dict(rating=[2]) myevidence = dict(occupation='student') result = tcf.specificquery(query=myquery, evidence=myevidence) print result
def __init__(self, nodes): self.nodes = {} self.children = defaultdict(list) self.parents = defaultdict(list) self.outputs = {} for name, node_spec in nodes.iteritems(): node_type = node_spec["type"] if node_type == "inferred": parents = node_spec["parents"] # store the relationship between these elements for parent in parents: normalised = normalise_name(parent) self.parents[name].append(normalised) self.children[normalised].append(name) truth_table = parse_truth_table(node_spec["p"], parents) node = make_node(truth_table, parents, node_type) self.nodes[name] = node if node_type == "fsm_input": node = make_node([1.0, 0.0], None, node_type) self.nodes[name] = node if node_type == "sensor_input": proxy_node = make_node([1.0, 0.0], None, "proxy") proxy_name = "_proxy_%s" % name self.nodes[proxy_name] = proxy_node self.children[proxy_name].append(name) node = make_node({ "['T']": [1.0, 0.0], "['F']": [0.0, 1.0] }, [proxy_name], node_type) self.nodes[name] = node if node_type == "output": self.outputs[name] = node_spec for node in self.nodes: if len(self.children[node]) > 0: self.nodes[node]["children"] = self.children[node] else: self.nodes[node]["children"] = None # certainty scaling self.event_caution = 0.0 og = OrderedSkeleton() og.V = self.nodes.keys() edges = [] for k, children in self.children.iteritems(): for child in children: edges.append((k, child)) og.E = edges og.toporder() nd = NodeData() nd.Vdata = self.nodes #logging.debug(pprint.pformat(nd.Vdata)) self.net = DiscreteBayesianNetwork(og, nd) self.factor_net = TableCPDFactorization(self.net)
#INITIALIZING BN 2 # load nodedata and graphskeleton nd2 = NodeData() skel2 = GraphSkeleton() nd2.load(path_bn2) skel2.load(path_bn2) skel2.toporder() # toporder graph skeleton # FINDING NEXT ACTIVITY ATTRIBUTES THROUGH INFERENCE ON BN 1 # wkday variable query evidence1 = dict(wkdayT0=userinput[0]) for i, item in enumerate(wkdayValsList): # loading bayesian network and factorization - needs to be done at every iteration bn1 = DiscreteBayesianNetwork(skel1, nd1) fn1 = TableCPDFactorization(bn1) # setting the query query1 = dict(wkdayT1=[item]) # querying in accordance to the given evidence and appending it to the list of probability of each value wkdayProbList.append(fn1.specificquery(query1, evidence1)) #print "Iteration: " + str(i) + "-> wkdayTO (Input): " + userinput[0] + "; wkdayT1 (Output): " + item + " - prob: " + str(wkdayProbList[i]) most_probable_wkdayT1 = wkdayValsList[numpy.argmax(wkdayProbList)] # hour variable query evidence1 = dict(hourT0=userinput[1]) for i, item in enumerate(hourValsList): # loading bayesian network and factorization - needs to be done at every iteration bn1 = DiscreteBayesianNetwork(skel1, nd1) fn1 = TableCPDFactorization(bn1) # setting the query query1 = dict(hourT1=[item])
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork from libpgm.lgbayesiannetwork import LGBayesianNetwork from libpgm.hybayesiannetwork import HyBayesianNetwork from libpgm.dyndiscbayesiannetwork import DynDiscBayesianNetwork from libpgm.tablecpdfactorization import TableCPDFactorization from libpgm.sampleaggregator import SampleAggregator from libpgm.pgmlearner import PGMLearner lgbn = LGBayesianNetwork(skel, nd) text = open("../unifiedMLData2.json") data = text.read() printable = set(string.printable) asciiData = filter(lambda x: x in printable, data) listofDicts = json.loads(asciiData) skel = GraphSkeleton() skel.load("../skeleton.json") learner = PGMLearner() result = learner.discrete_mle_estimateparams(skel, listofDicts) tcf = TableCPDFactorization(result) myquery = dict(rating=[5]) myevidence = dict(occupation='student') res2 = tcf.gibbssample(evidence=myevidence, n=3) print json.dumps(res2, indent=2)