def main(): # Defining the network structure model = BayesianModel([('C', 'H'), ('P', 'H')]) # H: host # P: prize # C: contestant # Defining the CPDs: cpd_c = TabularCPD('C', 3, [[0.33, 0.33, 0.33]]) cpd_p = TabularCPD('P', 3, [[0.33, 0.33, 0.33]]) cpd_h = TabularCPD('H', 3, [[0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.5], [0.5, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.5], [0.5, 1.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.0, 0.0]], evidence=['C', 'P'], evidence_card=[3, 3]) # Associating the CPDs with the network structure. model.add_cpds(cpd_c, cpd_p, cpd_h) # Some other methods # model.get_cpds() # check_model check for the model structure and the associated CPD and # returns True if everything is correct otherwise throws an exception # print model.check_model() # Infering the posterior probability infer = VariableElimination(model) posterior_p = infer.query(['H'], evidence={'C': 0, 'P': 0}) print(posterior_p['H'])
def setUp(self): self.sn2 = {'grade': ['A', 'B', 'F'], 'diff': ['high', 'low'], 'intel': ['poor', 'good', 'very good']} self.sn1 = {'speed': ['low', 'medium', 'high'], 'switch': ['on', 'off'], 'time': ['day', 'night']} self.phi1 = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12)) self.phi2 = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) self.cpd1 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3]) self.cpd2 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]]) intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]]) grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 2]) student.add_cpds(diff_cpd, intel_cpd, grade_cpd) self.model1 = Inference(student) self.model2 = Inference(student, state_names=self.sn2)
def get_model(self): """ Returns the fitted bayesian model Example ---------- >>> from pgmpy.readwrite import BIFReader >>> reader = BIFReader("bif_test.bif") >>> reader.get_model() <pgmpy.models.BayesianModel.BayesianModel object at 0x7f20af154320> """ try: model = BayesianModel(self.variable_edges) model.name = self.network_name model.add_nodes_from(self.variable_names) tabular_cpds = [] for var in sorted(self.variable_cpds.keys()): values = self.variable_cpds[var] cpd = TabularCPD(var, len(self.variable_states[var]), values, evidence=self.variable_parents[var], evidence_card=[len(self.variable_states[evidence_var]) for evidence_var in self.variable_parents[var]]) tabular_cpds.append(cpd) model.add_cpds(*tabular_cpds) for node, properties in self.variable_properties.items(): for prop in properties: prop_name, prop_value = map(lambda t: t.strip(), prop.split('=')) model.node[node][prop_name] = prop_value return model except AttributeError: raise AttributeError('First get states of variables, edges, parents and network name')
def setUp(self): nodes = {'c': {'STATES': ['Present', 'Absent'], 'DESCRIPTION': '(c) Brain Tumor', 'YPOS': '11935', 'XPOS': '15250', 'TYPE': 'discrete'}, 'a': {'STATES': ['Present', 'Absent'], 'DESCRIPTION': '(a) Metastatic Cancer', 'YPOS': '10465', 'XPOS': '13495', 'TYPE': 'discrete'}, 'b': {'STATES': ['Present', 'Absent'], 'DESCRIPTION': '(b) Serum Calcium Increase', 'YPOS': '11965', 'XPOS': '11290', 'TYPE': 'discrete'}, 'e': {'STATES': ['Present', 'Absent'], 'DESCRIPTION': '(e) Papilledema', 'YPOS': '13240', 'XPOS': '17305', 'TYPE': 'discrete'}, 'd': {'STATES': ['Present', 'Absent'], 'DESCRIPTION': '(d) Coma', 'YPOS': '12985', 'XPOS': '13960', 'TYPE': 'discrete'}} model = BayesianModel([('b', 'd'), ('a', 'b'), ('a', 'c'), ('c', 'd'), ('c', 'e')]) cpd_distribution = {'a': {'TYPE': 'discrete', 'DPIS': np.array([[0.2, 0.8]])}, 'e': {'TYPE': 'discrete', 'DPIS': np.array([[0.8, 0.2], [0.6, 0.4]]), 'CONDSET': ['c'], 'CARDINALITY': [2]}, 'b': {'TYPE': 'discrete', 'DPIS': np.array([[0.8, 0.2], [0.2, 0.8]]), 'CONDSET': ['a'], 'CARDINALITY': [2]}, 'c': {'TYPE': 'discrete', 'DPIS': np.array([[0.2, 0.8], [0.05, 0.95]]), 'CONDSET': ['a'], 'CARDINALITY': [2]}, 'd': {'TYPE': 'discrete', 'DPIS': np.array([[0.8, 0.2], [0.9, 0.1], [0.7, 0.3], [0.05, 0.95]]), 'CONDSET': ['b', 'c'], 'CARDINALITY': [2, 2]}} tabular_cpds = [] for var, values in cpd_distribution.items(): evidence = values['CONDSET'] if 'CONDSET' in values else [] cpd = values['DPIS'] evidence_card = values['CARDINALITY'] if 'CARDINALITY' in values else [] states = nodes[var]['STATES'] cpd = TabularCPD(var, len(states), cpd, evidence=evidence, evidence_card=evidence_card) tabular_cpds.append(cpd) model.add_cpds(*tabular_cpds) for var, properties in nodes.items(): model.node[var] = properties self.maxDiff = None self.writer = XMLBeliefNetwork.XBNWriter(model=model)
def bayesnet_examples(): from pgmpy.factors import TabularCPD from pgmpy.models import BayesianModel import pandas as pd student_model = BayesianModel([('D', 'G'), ('I', 'G'), ('G', 'L'), ('I', 'S')]) # we can generate some random data. raw_data = np.random.randint(low=0, high=2, size=(1000, 5)) data = pd.DataFrame(raw_data, columns=['D', 'I', 'G', 'L', 'S']) data_train = data[: int(data.shape[0] * 0.75)] student_model.fit(data_train) student_model.get_cpds() data_test = data[int(0.75 * data.shape[0]): data.shape[0]] data_test.drop('D', axis=1, inplace=True) student_model.predict(data_test) grade_cpd = TabularCPD( variable='G', variable_card=3, values=[[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], evidence=['I', 'D'], evidence_card=[2, 2]) difficulty_cpd = TabularCPD( variable='D', variable_card=2, values=[[0.6, 0.4]]) intel_cpd = TabularCPD( variable='I', variable_card=2, values=[[0.7, 0.3]]) letter_cpd = TabularCPD( variable='L', variable_card=2, values=[[0.1, 0.4, 0.99], [0.9, 0.6, 0.01]], evidence=['G'], evidence_card=[3]) sat_cpd = TabularCPD( variable='S', variable_card=2, values=[[0.95, 0.2], [0.05, 0.8]], evidence=['I'], evidence_card=[2]) student_model.add_cpds(grade_cpd, difficulty_cpd, intel_cpd, letter_cpd, sat_cpd)
def get_model(self): """ Returns an instance of Bayesian Model or Markov Model. Varibles are in the pattern var_0, var_1, var_2 where var_0 is 0th index variable, var_1 is 1st index variable. Return ------ model: an instance of Bayesian or Markov Model. Examples -------- >>> reader = UAIReader('TestUAI.uai') >>> reader.get_model() """ if self.network_type == 'BAYES': model = BayesianModel(self.edges) tabular_cpds = [] for cpd in self.tables: child_var = cpd[0] states = int(self.domain[child_var]) arr = list(map(float, cpd[1])) values = np.array(arr) values = values.reshape(states, values.size // states) tabular_cpds.append(TabularCPD(child_var, states, values)) model.add_cpds(*tabular_cpds) return model elif self.network_type == 'MARKOV': model = MarkovModel(self.edges) factors = [] for table in self.tables: variables = table[0] cardinality = [int(self.domain[var]) for var in variables] value = list(map(float, table[1])) factor = DiscreteFactor(variables=variables, cardinality=cardinality, values=value) factors.append(factor) model.add_factors(*factors) return model
def get_model(self): """ Returns the model instance of the ProbModel. Return --------------- model: an instance of BayesianModel. Examples ------- >>> reader = ProbModelXMLReader() >>> reader.get_model() """ if self.probnet.get('type') == "BayesianNetwork": model = BayesianModel(self.probnet['edges'].keys()) tabular_cpds = [] cpds = self.probnet['Potentials'] for cpd in cpds: var = list(cpd['Variables'].keys())[0] states = self.probnet['Variables'][var]['States'] evidence = cpd['Variables'][var] evidence_card = [len(self.probnet['Variables'][evidence_var]['States']) for evidence_var in evidence] arr = list(map(float, cpd['Values'].split())) values = np.array(arr) values = values.reshape((len(states), values.size//len(states))) tabular_cpds.append(TabularCPD(var, len(states), values, evidence, evidence_card)) model.add_cpds(*tabular_cpds) variables = model.nodes() for var in variables: for prop_name, prop_value in self.probnet['Variables'][var].items(): model.node[var][prop_name] = prop_value edges = model.edges() for edge in edges: for prop_name, prop_value in self.probnet['edges'][edge].items(): model.edge[edge[0]][edge[1]][prop_name] = prop_value return model else: raise ValueError("Please specify only Bayesian Network.")
def get_model(self): model = BayesianModel(self.get_edges()) model.name = self.network_name tabular_cpds = [] for var, values in self.variable_CPD.items(): cpd = TabularCPD(var, len(self.variable_states[var]), values, evidence=self.variable_parents[var], evidence_card=[len(self.variable_states[evidence_var]) for evidence_var in self.variable_parents[var]]) tabular_cpds.append(cpd) model.add_cpds(*tabular_cpds) for node, properties in self.variable_property.items(): for prop in properties: prop_name, prop_value = map(lambda t: t.strip(), prop.split('=')) model.node[node][prop_name] = prop_value return model
def get_model(self): """ Returns an instance of Bayesian Model. """ model = BayesianModel(self.edges) model.name = self.model_name tabular_cpds = [] for var, values in self.variable_CPD.items(): evidence = values['CONDSET'] if 'CONDSET' in values else [] cpd = values['DPIS'] evidence_card = values['CARDINALITY'] if 'CARDINALITY' in values else [] states = self.variables[var]['STATES'] cpd = TabularCPD(var, len(states), cpd, evidence=evidence, evidence_card=evidence_card) tabular_cpds.append(cpd) model.add_cpds(*tabular_cpds) for var, properties in self.variables.items(): model.node[var] = properties return model
from pgmpy.models import BayesianModel from pgmpy.inference import ClusterBeliefPropagation as CBP from pgmpy.factors import TabularCPD restaurant_model = BayesianModel([('location', 'cost'), ('quality', 'cost'), ('location', 'no_of_people'), ('cost', 'no_of_people')]) cpd_location = TabularCPD('location', 2, [[0.6, 0.4]]) cpd_quality = TabularCPD('quality', 3, [[0.3, 0.5, 0.2]]) cpd_cost = TabularCPD('cost', 2, [[0.8, 0.6, 0.1, 0.6, 0.6, 0.05], [0.2, 0.1, 0.9, 0.4, 0.4, 0.95]], ['location', 'quality'], [2, 3]) cpd_no_of_people = TabularCPD('no_of_people', 2, [[0.6, 0.8, 0.1, 0.6], [0.4, 0.2, 0.9, 0.4]], ['cost', 'location'], [2, 2]) restaurant_model.add_cpds(cpd_location, cpd_quality, cpd_cost, cpd_no_of_people) cluster_inference = CBP(restaurant_model) cluster_inference.query(variables=['cost']) cluster_inference.query(variables=['cost'], evidence={'no_of_people': 1, 'quality': 0})
def configure(self, rf): # command format will be the following: # trainPGClassifier selfName networkStructure print sys.argv # read network structure and make graph # labels in networkStructure identical to model names # networkStructure as a string containing a list of tuples # selfName = 'actionPGN' # netStructureString = "[('Actions3 exp','actionPGN'), ('Actions4','actionPGN')]" selfName = sys.argv[1] netStructureString = sys.argv[2] netStructure = ast.literal_eval(netStructureString) print netStructure # collect all model names in a list to extract a unique set modelList = [] for k in netStructure: modelList += list(k) print list(set(modelList)) # create a port to connect to /sam/rpc:i to query model path for each model name portsList = [] querySupervisorPort = yarp.RpcClient() querySupervisorPortName = '/sam/' + selfName + '/queryRpc' querySupervisorPort.open(querySupervisorPortName) portsList.append({'name': querySupervisorPortName, 'port': querySupervisorPort}) yarp.Network.connect(querySupervisorPortName, '/sam/rpc:i') # --------------------------------------------------------------------------------------------------------------- modelDict = dict() failFlag = False for j in modelList: if j != selfName: modNameSplit = j.split(' ') cmd = yarp.Bottle() cmd.addString('dataDir') for l in modNameSplit: cmd.addString(l) reply = yarp.Bottle() querySupervisorPort.write(cmd, reply) if reply.get(0).asString() != 'nack': modelDict[modNameSplit[0]] = {'filename': reply.get(1).asString(), 'pickleData': None} # try: # load pickle for the model file currPickle = pickle.load(open(reply.get(1).asString(), 'rb')) # try loading labelComparisonDict from the pickle if 'labelComparisonDict' in currPickle.keys(): modelDict[modNameSplit[0]]['pickleData'] = currPickle['labelComparisonDict'] print j, 'labelComparisonDict loaded' else: print modNameSplit[0], 'labelComparisonDict not found' failFlag = True if 'overallPerformanceLabels' in currPickle.keys(): modelDict[modNameSplit[0]]['labels'] = currPickle['overallPerformanceLabels'] print j, 'overallPerformanceLabels loaded' else: print j, 'overallPerformanceLabels not found' failFlag = True # except: # failFlag = True else: failFlag = True print 'FAIL?', failFlag if failFlag: return False modelList = modelDict.keys() print modelList # --------------------------------------------------------------------------------------------------------------- # extract unique lists from the collected data # the unique list of pickleData[original] represents the possibleClassifications for each model modelDict[selfName] = dict() modelDict[selfName]['labels'] = [] selfModelCol = 1 for j in modelList: modelDict[j]['CPD'] = np.zeros([1, len(modelDict[j]['labels'])]) print j, 'unique labels:', modelDict[j]['labels'] print j, 'CPD shape', modelDict[j]['CPD'].shape modelDict[selfName]['labels'] += modelDict[j]['labels'] selfModelCol *= len(modelDict[j]['labels']) print # the possibleClassifications for both models (outputs of the PGN) # are the unique list of the model specific labels for all models modelDict[selfName]['labels'] = list(set(modelDict[selfName]['labels'])) modelDict[selfName]['actualLabels'] = modelDict[j]['pickleData']['original'] modelDict[selfName]['CPD'] = np.zeros([len(modelDict[selfName]['labels']), selfModelCol]) print selfName, 'unique labels:', modelDict[selfName]['labels'] print selfName, 'CPD shape', modelDict[selfName]['CPD'].shape # check that original classifications of both are identical # otherwise cannot combine them with a single node. # This is currently a big limitation that will be removed later print modelDict[selfName]['labels'] for j in modelList: print j, for k in range(len(modelDict[j]['pickleData']['original'])): print modelDict[j]['pickleData']['original'][k] if modelDict[j]['pickleData']['original'][k] not in modelDict[selfName]['labels']: modelDict[j]['pickleData']['original'][k] = 'unknown' for j in modelList: if modelDict[j]['pickleData']['original'] != modelDict[selfName]['actualLabels']: failFlag = True print 'original classifications of', j, 'are not identical to those of', selfName if failFlag: return False # Update netStructureString to reflect changes in the modelList names strSections = netStructureString.split("'") for k in range(len(strSections)): if len(strSections[k]) > 2 and ',' not in strSections[k]: strSections[k] = strSections[k].split(' ')[0] netStructureString = "'".join(strSections) netStructure = ast.literal_eval(netStructureString) # --------------------------------------------------------------------------------------------------------------- # iterate through actual labels # for each actual label, iterate through models # for each model find classification label of this model for current actual label # get the index of the current classification and add it to its CPD # also calculate which item in the joint CPD needs to be incremented for j in range(len(modelDict[selfName]['actualLabels'])): currActualLabel = modelDict[selfName]['actualLabels'][j] row = modelDict[selfName]['labels'].index(currActualLabel) colVar = np.zeros([len(modelList)]) for k in range(len(modelList)): cmod = modelList[k] if k != 0: pmod = modelList[k-1] colVar *= len(modelDict[pmod]['labels']) colVar[k] = modelDict[cmod]['labels'].index( modelDict[cmod]['pickleData']['results'][j]) modelDict[cmod]['CPD'][0, colVar[k]] += 1 col = sum(colVar) modelDict[selfName]['CPD'][row, col] += 1 # take all CPD's and normalise the matrices evidenceCard = copy.deepcopy(modelList) for j in modelDict: if j == selfName: # this is a joint CPD matrix # normalise columns to have sum = 1 modelDict[j]['CPD'] = normalize(modelDict[j]['CPD'], axis=0, norm='l1') else: # normalise sum of matrix = 1 modelDict[j]['CPD'] /= np.sum(modelDict[j]['CPD']) evidenceCard[evidenceCard.index(j)] = len(modelDict[j]['labels']) print modelDict[j]['CPD'] model = BayesianModel(netStructure) # create TabularCPD data structure to nest calculated CPD for j in modelDict: if j == selfName: modelDict[j]['cpdObject'] = TabularCPD(variable=j, variable_card=len(modelDict[j]['labels']), values=modelDict[j]['CPD'], evidence=modelList, evidence_card=evidenceCard) else: modelDict[j]['cpdObject'] = TabularCPD(variable=j, variable_card=len(modelDict[j]['labels']), values=modelDict[j]['CPD']) # Associating the CPDs with the network for j in modelDict: model.add_cpds(modelDict[j]['cpdObject']) # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly # defined and sum to 1. if not model.check_model(): print 'Model check returned unsuccessful' return False infer = VariableElimination(model) confMatrix = np.zeros(len(modelDict[selfName]['labels'])) # iterate over all original data and perform classifications to calculate if accuracy with PGN has increased for j in range(len(modelDict[selfName]['actualLabels'])): currEvidenceDict = dict() for k in modelList: currEvidenceDict[k] = modelDict[k]['labels'].index(modelDict[k]['pickleData']['results'][j]) q = infer.query([selfName], currEvidenceDict) inferenceClass = modelDict[selfName]['labels'][np.argmax(q[selfName].values)] actualClass = modelDict[selfName]['actualLabels'][j] confMatrix[modelDict[selfName].index(actualClass), modelDict[selfName].index(inferenceClass)] += 1 print "%Accuracy with PGN" dCalc = SAMTesting.calculateData(modelDict[selfName]['actualLabels'], confMatrix) return True
[0.97, 0.95, 0.999, 0.98]], evidence=['Smoker', 'Pollution'], evidence_card=[2, 2]) cpd_xray = TabularCPD(variable='Xray', variable_card=2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=['Cancer'], evidence_card=[2]) cpd_dysp = TabularCPD(variable='Dyspnoea', variable_card=2, values=[[0.65, 0.3], [0.35, 0.7]], evidence=['Cancer'], evidence_card=[2]) # Associating the parameters with the model structure. cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, cpd_xray, cpd_dysp) print('Model generated by adding conditional probability disttributions(cpds)') # Checking if the cpds are valid for the model. print('Checking for Correctness of model : ', end='') print(cancer_model.check_model()) print('All local idependencies are as follows') cancer_model.get_independencies() print('Displaying CPDs') print(cancer_model.get_cpds('Pollution')) print(cancer_model.get_cpds('Smoker')) print(cancer_model.get_cpds('Cancer')) print(cancer_model.get_cpds('Xray')) print(cancer_model.get_cpds('Dyspnoea'))
class TestXMLBIFWriterMethodsString(unittest.TestCase): def setUp(self): edges = [ ["family-out", "dog-out"], ["bowel-problem", "dog-out"], ["family-out", "light-on"], ["dog-out", "hear-bark"], ] cpds = { "bowel-problem": np.array([[0.01], [0.99]]), "dog-out": np.array([[0.99, 0.01, 0.97, 0.03], [0.9, 0.1, 0.3, 0.7]]), "family-out": np.array([[0.15], [0.85]]), "hear-bark": np.array([[0.7, 0.3], [0.01, 0.99]]), "light-on": np.array([[0.6, 0.4], [0.05, 0.95]]), } states = { "bowel-problem": ["true", "false"], "dog-out": ["true", "false"], "family-out": ["true", "false"], "hear-bark": ["true", "false"], "light-on": ["true", "false"], } parents = { "bowel-problem": [], "dog-out": ["family-out", "bowel-problem"], "family-out": [], "hear-bark": ["dog-out"], "light-on": ["family-out"], } properties = { "bowel-problem": ["position = (190, 69)"], "dog-out": ["position = (155, 165)"], "family-out": ["position = (112, 69)"], "hear-bark": ["position = (154, 241)"], "light-on": ["position = (73, 165)"], } self.model = BayesianModel(edges) tabular_cpds = [] for var, values in cpds.items(): cpd = TabularCPD( var, len(states[var]), values, evidence=parents[var], evidence_card=[len(states[evidence_var]) for evidence_var in parents[var]], ) tabular_cpds.append(cpd) self.model.add_cpds(*tabular_cpds) for node, properties in properties.items(): for prop in properties: prop_name, prop_value = map(lambda t: t.strip(), prop.split("=")) self.model.node[node][prop_name] = prop_value self.writer = XMLBIFWriter(model=self.model) def test_file(self): self.expected_xml = etree.XML( """<BIF version="0.3"> <NETWORK> <VARIABLE TYPE="nature"> <NAME>bowel-problem</NAME> <OUTCOME>0</OUTCOME> <OUTCOME>1</OUTCOME> <PROPERTY>position = (190, 69)</PROPERTY> </VARIABLE> <VARIABLE TYPE="nature"> <NAME>dog-out</NAME> <OUTCOME>0</OUTCOME> <OUTCOME>1</OUTCOME> <PROPERTY>position = (155, 165)</PROPERTY> </VARIABLE> <VARIABLE TYPE="nature"> <NAME>family-out</NAME> <OUTCOME>0</OUTCOME> <OUTCOME>1</OUTCOME> <PROPERTY>position = (112, 69)</PROPERTY> </VARIABLE> <VARIABLE TYPE="nature"> <NAME>hear-bark</NAME> <OUTCOME>0</OUTCOME> <OUTCOME>1</OUTCOME> <PROPERTY>position = (154, 241)</PROPERTY> </VARIABLE> <VARIABLE TYPE="nature"> <NAME>light-on</NAME> <OUTCOME>0</OUTCOME> <OUTCOME>1</OUTCOME> <PROPERTY>position = (73, 165)</PROPERTY> </VARIABLE> <DEFINITION> <FOR>bowel-problem</FOR> <TABLE>0.01 0.99 </TABLE> </DEFINITION> <DEFINITION> <FOR>dog-out</FOR> <GIVEN>bowel-problem</GIVEN> <GIVEN>family-out</GIVEN> <TABLE>0.99 0.01 0.97 0.03 0.9 0.1 0.3 0.7 </TABLE> </DEFINITION> <DEFINITION> <FOR>family-out</FOR> <TABLE>0.15 0.85 </TABLE> </DEFINITION> <DEFINITION> <FOR>hear-bark</FOR> <GIVEN>dog-out</GIVEN> <TABLE>0.7 0.3 0.01 0.99 </TABLE> </DEFINITION> <DEFINITION> <FOR>light-on</FOR> <GIVEN>family-out</GIVEN> <TABLE>0.6 0.4 0.05 0.95 </TABLE> </DEFINITION> </NETWORK> </BIF>""" ) self.maxDiff = None self.writer.write_xmlbif("test_bif.xml") with open("test_bif.xml", "r") as myfile: data = myfile.read() self.assertEqual(str(self.writer.__str__()[:-1]), str(etree.tostring(self.expected_xml))) self.assertEqual(str(data), str(etree.tostring(self.expected_xml).decode("utf-8")))
variable_card=2, values=[[0.95,0.2], [0.05,0.8]], evidence=['Musicianship'], evidence_card=[2]) #print(rating_cpd) #print(difficulty_cpd) #print(musicianship_cpd) #print(letter_cpd) #print(exam_cpd) print(music_model.edges()) #Add the CPDS to the model music_model.add_cpds(difficulty_cpd,musicianship_cpd,letter_cpd,exam_cpd,rating_cpd) #print(music_model.get_cpds()) print(music_model.check_model()) #Create object to perform inference on model music_infer = VariableElimination(music_model) #Probability Musicianship m_1 = music_infer.query(variables=['Musicianship']) print(m_1['Musicianship']) #Probability Difficulty d_l = music_infer.query(variables=['Difficulty']) print(d_l['Difficulty'])
class TestGibbsSampling(unittest.TestCase): def setUp(self): # A test Bayesian model diff_cpd = TabularCPD('diff', 2, [[0.6], [0.4]]) intel_cpd = TabularCPD('intel', 2, [[0.7], [0.3]]) grade_cpd = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], evidence=['diff', 'intel'], evidence_card=[2, 2]) self.bayesian_model = BayesianModel() self.bayesian_model.add_nodes_from(['diff', 'intel', 'grade']) self.bayesian_model.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd) # A test Markov model self.markov_model = MarkovModel([('A', 'B'), ('C', 'B'), ('B', 'D')]) factor_ab = Factor(['A', 'B'], [2, 3], [1, 2, 3, 4, 5, 6]) factor_cb = Factor(['C', 'B'], [4, 3], [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6]) factor_bd = Factor(['B', 'D'], [3, 2], [5, 7, 2, 1, 9, 3]) self.markov_model.add_factors(factor_ab, factor_cb, factor_bd) self.gibbs = GibbsSampling(self.bayesian_model) def tearDown(self): del self.bayesian_model del self.markov_model @patch('pgmpy.inference.Sampling.GibbsSampling._get_kernel_from_bayesian_model', autospec=True) @patch('pgmpy.models.MarkovChain.__init__', autospec=True) def test_init_bayesian_model(self, init, get_kernel): model = MagicMock(spec_set=BayesianModel) gibbs = GibbsSampling(model) init.assert_called_once_with(gibbs) get_kernel.assert_called_once_with(gibbs, model) @patch('pgmpy.inference.Sampling.GibbsSampling._get_kernel_from_markov_model', autospec=True) def test_init_markov_model(self, get_kernel): model = MagicMock(spec_set=MarkovModel) gibbs = GibbsSampling(model) get_kernel.assert_called_once_with(gibbs, model) def test_get_kernel_from_bayesian_model(self): gibbs = GibbsSampling() gibbs._get_kernel_from_bayesian_model(self.bayesian_model) self.assertListEqual(list(gibbs.variables), self.bayesian_model.nodes()) self.assertDictEqual(gibbs.cardinalities, {'diff': 2, 'intel': 2, 'grade': 3}) def test_get_kernel_from_markov_model(self): gibbs = GibbsSampling() gibbs._get_kernel_from_markov_model(self.markov_model) self.assertListEqual(list(gibbs.variables), self.markov_model.nodes()) self.assertDictEqual(gibbs.cardinalities, {'A': 2, 'B': 3, 'C': 4, 'D': 2}) def test_sample(self): start_state = [State('diff', 0), State('intel', 0), State('grade', 0)] sample = self.gibbs.sample(start_state, 2) self.assertEquals(len(sample), 2) self.assertEquals(len(sample.columns), 3) self.assertIn('diff', sample.columns) self.assertIn('intel', sample.columns) self.assertIn('grade', sample.columns) self.assertTrue(set(sample['diff']).issubset({0, 1})) self.assertTrue(set(sample['intel']).issubset({0, 1})) self.assertTrue(set(sample['grade']).issubset({0, 1, 2})) @patch("pgmpy.inference.Sampling.GibbsSampling.random_state", autospec=True) def test_sample_less_arg(self, random_state): self.gibbs.state = None random_state.return_value = [State('diff', 0), State('intel', 0), State('grade', 0)] sample = self.gibbs.sample(size=2) random_state.assert_called_once_with(self.gibbs) self.assertEqual(len(sample), 2) def test_generate_sample(self): start_state = [State('diff', 0), State('intel', 0), State('grade', 0)] gen = self.gibbs.generate_sample(start_state, 2) samples = [sample for sample in gen] self.assertEqual(len(samples), 2) self.assertEqual({samples[0][0].var, samples[0][1].var, samples[0][2].var}, {'diff', 'intel', 'grade'}) self.assertEqual({samples[1][0].var, samples[1][1].var, samples[1][2].var}, {'diff', 'intel', 'grade'}) @patch("pgmpy.inference.Sampling.GibbsSampling.random_state", autospec=True) def test_generate_sample_less_arg(self, random_state): self.gibbs.state = None gen = self.gibbs.generate_sample(size=2) samples = [sample for sample in gen] random_state.assert_called_once_with(self.gibbs) self.assertEqual(len(samples), 2)
class ExactCounterfactual(object): """ A class for performing Exact counterfactual inference in both the Standard and Twin Network approaches. N.B.: For logging time, this relies on a custom edit of pgmpy.inference.ExactInference.VariableElimination, where the query also returns (as a second return) the time it takes to perform factor marginalization. """ def __init__(self, verbose=False, merge=False): """ Initialize the class. Args: verbose: whether or not to automatically print the Twin & standard inference times. merge: whether or not to perform node merging. """ self.verbose = verbose self.merge = merge def construct(self, causal_model=None, G=None, df=None, n_samples=20000): """ Init Args: twin_network: a TwinNetwork class. G: a networkx graph describing the dependency relationships. df: a dataframe of samples from that graph, used to construct the conditional probability tables. """ if causal_model is None: assert G is not None and df is not None, "Must initialize G and df if no TwinNetwork passed." self.G = G self.df = df else: self.scm = causal_model self.G = causal_model.G.copy() samples = causal_model.sample(n_samples) self.df = pd.DataFrame(samples, columns=causal_model.ordering) self.model = None # reset self.twin_model = None # reset self.counterfactual_model = None # reset self._compile_model() def _compile_model(self): """ Makes a pgmpy model out of a networkx graph and parameterizes its CPD with CPTs estimated from a model. """ self.model = BayesianModel(list(self.G.edges)) self._construct_CPD() def create_twin_network(self, node_of_interest, observed, intervention): """ Generate self.twin_model based on the current model, then merge nodes and eliminate nodes that are conditionally independent of the counterfactual node of interest. Args: node_of_interest: the node of interest to perform inference on. observed: a dictionary of {node: observed_value} to condition on. intervention: a dictionary of {node: intervention_value} to intervene on. """ self.twin_model = self.model.copy() self.twin_model.add_nodes_from([ "{}tn".format(n) for n in list(self.twin_model.nodes) if len(list(self.model.predecessors(n))) != 0 ]) # add all non-noise nodes self.twin_model.add_edges_from([ ("{}tn".format(pa), "{}tn".format(ch)) for pa, ch in list(self.model.edges) if len(list(self.model.predecessors(pa))) != 0 ]) # add all non-noise edges self.twin_model.add_edges_from([ (pa, "{}tn".format(ch)) for pa, ch in list(self.model.edges) if len(list(self.model.predecessors(pa))) == 0 ]) #add all noise edges # merge nodes if merge flag is true if self.merge: self.merge_nodes(node_of_interest, intervention) # get appropriately ordered CPTs for new merged representation duplicate_cpts = [] for node in self.twin_model.nodes: if node[-2:] == "tn": # if in the twin network model node_parents = list(self.twin_model.predecessors(node)) non_twin_parents = [ pa.replace("tn", "") for pa in node_parents ] cpt = TabularCPD( node, 2, self.model.get_cpds( node[:-2]).reorder_parents(non_twin_parents), node_parents, len(node_parents) * [2]) duplicate_cpts.append(cpt) self.twin_model.add_cpds(*duplicate_cpts) # make model efficient modified_intervention = { n + "tn": intervention[n] for n in intervention } # modify for twin network syntax self.intervene(modified_intervention, twin=True) self._eliminate_conditionally_independent(node_of_interest, observed, intervention) def _construct_CPD(self, counterfactual=False, df=None): cpt_list = [] if df is None: df = self.df for node in self.G.nodes: cpt_list.append(self._get_node_CPT(node, df)) if counterfactual: self.counterfactual_model.add_cpds(*cpt_list) else: self.model.add_cpds(*cpt_list) self.df = None # erase df to make object pickleable, otherwise the object becomes unpicklable. (Important for parallel processing) def _get_node_CPT(self, node, df=None): parents = list(self.G.predecessors(node)) if len(parents) == 0: # if root node (latent) mu = df[node].mean() return TabularCPD(node, 2, values=[[1 - mu], [mu]]) elif len(parents) > 0: mus = df.groupby(parents)[node].mean().reset_index() uniques = mus[parents].drop_duplicates() parent_combos = list(product(*[[0, 1] for _ in parents])) appends = [] for combo in parent_combos: if not (uniques == np.array(combo) ).all(1).any(): # if value not enumerated in sample appends.append(list(combo) + [0.5]) # add an uninformative prior add_df = pd.DataFrame(appends, columns=parents + [node]) mus = pd.concat((mus, add_df), axis=0) mus = mus.sort_values(by=parents) mus = mus[node].values cpt = np.vstack((1. - mus, mus)) cpt = TabularCPD(node, 2, values=cpt, evidence=parents, evidence_card=len(parents) * [2]) return cpt def query(self, var, observed, counterfactual=False, twin=False): """ Run an arbitrary query by Variable Elimination. What is the analytic cost of this? You have to do K noise queries in a graph with K endog nodes + K exog nodes in normal CFI. In twin network inference, you have to do 1 query in a graph with 2K endog nodes + K exog nodes. Args: var: variable of interest, i.e. P(Var | Observed) observed: a dictionary of {node_name: observed_value} to condition on. counterfactual: if true, uses the counterfactual model. (self.counterfactual_model) twin: if true, uses the twin network model. (self.twin_model) Returns: """ if not isinstance(var, list): var = [var] if twin: # time_start = time.time() infer = VariableElimination(self.efficient_twin_model) result, time_elapsed = infer.query(var, evidence=observed, stopwatch=True) self.twin_inference_time = time_elapsed elif counterfactual: # time_start = time.time() infer = VariableElimination(self.counterfactual_model) result, time_elapsed = infer.query(var, evidence=observed, stopwatch=True) self.standard_inference_time = self.joint_inference_time + time_elapsed else: infer = VariableElimination(self.model) result, time_elapsed = infer.query(var, evidence=observed, stopwatch=True) return result, time_elapsed def intervene(self, intervention, counterfactual=False, twin=False): """ Performs the intervention on the BN object by setting the CPT to be deterministic and removing parents. Args: intervention: a dictionary of {node_name: intervention_value} to intervene on. """ cpt_list = [] if counterfactual and not twin: model = self.counterfactual_model elif twin and not counterfactual: model = self.twin_model else: model = self.model for node in intervention: if node in model.nodes: # do-calculus graph surgery: remove edges from parents parent_edges = [(pa, node) for pa in model.predecessors(node)] model.remove_edges_from(parent_edges) model.remove_node("U{}".format(node)) # set new deterministic CPT value = intervention[node] cpt = [[], []] cpt[value] = [1] cpt[int(not bool(value))] = [0] new_cpt = TabularCPD(node, 2, values=cpt) cpt_list.append(new_cpt) # override existing CPTs model.add_cpds(*cpt_list) def abduction(self, observed, n_samples=None): # infer latent joint and store the time it takes noise_nodes = [ n for n in self.G.nodes if len(list(self.G.predecessors(n))) == 0 ] new_joint, time_elapsed = self.query(noise_nodes, observed) self.joint_inference_time = time_elapsed new_joint = new_joint.values.ravel() # sample from network with new latent distribution ## sample from joint dim = 2**len(noise_nodes) val_idx = np.arange(dim) # define number of samples if n_samples is None: # be careful with this! n_samples = min( [30 * 2**(len(list(self.G.nodes)) - len(noise_nodes)), 100000]) noise_sample_idx = np.random.choice(val_idx, size=n_samples, p=new_joint) vals = np.array( list(product(*[[0, 1] for _ in range(len(noise_nodes))]))) noise_samples = vals[noise_sample_idx] ## intervene in DAG self.scm.do( {n: noise_samples[:, i] for i, n in enumerate(noise_nodes)}) ## sample with these interventions counterfactual_samples = pd.DataFrame(self.scm.sample(n_samples), columns=self.scm.ordering) # construct cpts with new distribution self.counterfactual_model = self.model.copy() self._construct_CPD(counterfactual=True, df=counterfactual_samples) def exact_abduction_prediction(self, noi, ev, intn, n_joint_samples=30000): # sample from exact joint distribution start = time.time() joint = self.query(self.scm._get_exog_nodes(), ev)[0] values = np.array( list(product(*[range(card) for card in joint.cardinality]))) n_joint_samples = max([n_joint_samples, 30 * values.shape[0]]) probabilities = joint.values.ravel() idx = np.random.choice(np.arange(values.shape[0]), size=n_joint_samples, p=probabilities) samples = values[idx] samples = { joint.variables[i]: samples[:, i] for i in range(len(joint.variables)) } print(time.time() - start) # pass joint samples self.scm.do(samples) # format intervention if isinstance(intn[list(intn.keys())[0]], int): intn = {k: intn[k] * np.ones(n_joint_samples) for k in intn} self.scm.do(intn) # sample form new model prediction = self.scm.sample(return_pandas=True)[noi] return prediction.mean() def enumerate_inference(self, noi, ev, intn, n_samples=30000): """ Performs exact counterfactual inference by enumeration. """ intn = {k: intn[k] * np.ones(n_samples) for k in intn} joint_sample, joint_prob = self.posterior_enumerate(ev) joint_samples = joint_sample[np.random.choice(np.arange( joint_sample.shape[0]), p=joint_prob, size=n_samples)] joint_samples = { node: joint_samples[:, i] for i, node in enumerate(self.scm._get_exog_nodes()) } self.scm.do(joint_samples) self.scm.do(intn) prediction = self.scm.sample(return_pandas=True)[noi] return prediction.mean() def posterior_enumerate(self, evidence): """ Inference via enumeration. """ # set up enumeration exog_nodes = self.scm._get_exog_nodes() endog_nodes = self.scm._get_endog_nodes() evidence_array = np.array( [evidence[k] for k in endog_nodes if k in evidence]) evidence_index = [ i for i, v in enumerate(endog_nodes) if v in evidence ] combinations = np.array( list(product(*[range(2) for _ in range(len(exog_nodes))]))) probabilities = np.array( [self.scm.G.nodes[node]['p'] for node in exog_nodes]) prior = combinations * probabilities + (1 - combinations) * ( 1 - probabilities) def vector_compare(val_prob): joint_sample, prior = val_prob self.scm.do({ exog_nodes[i]: joint_sample[i] for i in range(len(exog_nodes)) }) samp = self.scm.sample().flatten() if np.all(evidence_array == samp[evidence_index]): return np.product(prior) else: return 0 posterior = np.array( [i for i in map(vector_compare, zip(combinations, prior))]) posterior = posterior / np.sum(posterior) return combinations, posterior def _generate_counterfactual_model(self, observed, intervention, n_samples=None): """ Runs the standard counterfactual inference procedure and returns an intervened model with the posterior. Args: observed: a dictionary of {node: observed_value} to condition on. intervention: a dictionary of {node: intervention_value} to intervene on. """ self.abduction(observed, n_samples) self.intervene(intervention, counterfactual=True) def standard_counterfactual_query(self, node_of_interest, observed, intervention, n_samples_for_approx=None): """ Query and sample from the counterfactual model. Args: observed: a dictionary of {node: observed_value} to condition on. intervention: a dictionary of {node: intervention_value} to intervene on. n_samples: number of samples to draw from the counterfactual world model. """ # infer latents and generate model, also initializes self.standard_inference_time self._generate_counterfactual_model(observed, intervention, n_samples=n_samples_for_approx) # then run the query ## for stability, pass in as evidence a deterministic value for the intervention node int_noise_node_values = { "U{}".format(k): intervention[k] for k in intervention } q, time_elapsed = self.query(node_of_interest, observed=int_noise_node_values, counterfactual=True) self.standard_inference_time = self.joint_inference_time + time_elapsed return q def merge_nodes(self, node_of_interest, intervention): """ Merge nodes in the Twin Counterfactual network. In place modifies `self.twin_model`. Works by giving children of the node to be eliminated to its factual counterpart. Operates topologically. """ # find every non-descendant of the intervention nodes nondescendant_sets = [] all_nodes = set([i for i in list(self.model.nodes) if i[0] != 'U']) for node in intervention: nondescendant_sets.append( all_nodes.difference(set(nx.descendants(self.model, node)))) dont_merge = [node_of_interest] + list(intervention.keys()) shared_nondescendants = set.intersection( *nondescendant_sets) - set(dont_merge) # now modify twin network to replace all _tn variables with their regular counterpart ordered_nondescendants = [ n for n in nx.topological_sort(self.model) if n in list(shared_nondescendants) ] for node in ordered_nondescendants: # start with the oldest nodes twin_node = node + "tn" tn_children = self.twin_model.successors(twin_node) self.twin_model.add_edges_from([(node, c) for c in tn_children]) self.twin_model.remove_node(twin_node) def _eliminate_conditionally_independent(self, node_of_interest, observed, intervention): """ Generate an "efficient" twin network model by removing nodes that are d-separated from the node of interest given observed and intervened variables. Args: node_of_interest: the node of interest in the query. observed: a dictionary of {node: observed_value} to condition on. intervention: a dictionary of {node: intervention_value} to intervene on. """ conditioned_on = list(observed) + list(intervention) self.efficient_twin_model = self.twin_model.copy() for node in [n for n in self.twin_model.nodes if n[-2:] == "tn"]: try: if not self.efficient_twin_model.is_active_trail( node, node_of_interest + "tn", observed=conditioned_on): self.efficient_twin_model.remove_node(node) except: pass def twin_counterfactual_query(self, node_of_interest, observed, intervention): """ Query and sample from the counterfactual model. Args: observed: a dictionary of {node: observed_value} to condition on. intervention: a dictionary of {node: intervention_value} to intervene on. n_samples: number of samples to draw from the counterfactual world model. """ self.create_twin_network(node_of_interest, observed, intervention) # then, create the twin network result, time_elapsed = self.query( node_of_interest + "tn", observed, twin=True) # log time it takes to do p(Vtn | E) return result def sample(self, n_samples=1, counterfactual=False, twin=False): """ Perform forward sampling from the model. Args: n_samples: the number of samples you'd like to return """ if counterfactual: model = self.counterfactual_model elif twin: model = self.twin_model else: model = self.model inference = BayesianModelSampling(model) return inference.forward_sample(size=n_samples, return_type='dataframe') def compare_times(self, node_of_interest, observed, intervention, n_samples_for_approx=None): """ Compare the times it takes to do inference in the standard and twin network counterfactual inference approaches. Args: node_of_interest: the node of interest to perform inference on. observed: a dictionary of {node: observed_value} to condition on. intervention: a dictionary of {node: intervention_value} to intervene on. """ try: with warnings.catch_warnings(): warnings.simplefilter("ignore") print("A. Performing Standard Counterfactual Inference.") self.standard_counterfactual_query(node_of_interest, observed, intervention, n_samples_for_approx) print("B. Performing Twin Network Counterfactual Inference.") # first, reset the graph network self.scm.G = self.scm.G_original.copy() self.twin_counterfactual_query(node_of_interest, observed, intervention) if self.verbose: print(self.standard_inference_time, self.twin_inference_time) return self except Exception as e: print(e) print((node_of_interest, observed, intervention)) return False # return False bool to indicate failed experiment.
def get_game_network(): BayesNet = BayesianModel() BayesNet.add_node('A') BayesNet.add_node('B') BayesNet.add_node('C') BayesNet.add_node('AvB') BayesNet.add_node('BvC') BayesNet.add_node('CvA') BayesNet.add_edge('A', 'AvB') BayesNet.add_edge('B', 'AvB') BayesNet.add_edge('B', 'BvC') BayesNet.add_edge('C', 'BvC') BayesNet.add_edge('C', 'CvA') BayesNet.add_edge('A', 'CvA') cpd_a = TabularCPD('A', 4, values=[[0.15], [0.45], [0.3], [0.1]]) cpd_b = TabularCPD('B', 4, values=[[0.15], [0.45], [0.3], [0.1]]) cpd_c = TabularCPD('C', 4, values=[[0.15], [0.45], [0.3], [0.1]]) cpd_avb = TabularCPD('AvB', 3, values=[[ 0.10, 0.20, 0.15, 0.05, 0.60, 0.10, 0.20, 0.15, 0.75, 0.60, 0.10, 0.20, 0.90, 0.75, 0.60, 0.10 ], [ 0.10, 0.60, 0.75, 0.90, 0.20, 0.10, 0.60, 0.75, 0.15, 0.20, 0.10, 0.60, 0.05, 0.15, 0.20, 0.10 ], [ 0.80, 0.20, 0.10, 0.05, 0.20, 0.80, 0.20, 0.10, 0.10, 0.20, 0.80, 0.20, 0.05, 0.10, 0.20, 0.80 ]], evidence=['A', 'B'], evidence_card=[4, 4]) cpd_bvc = TabularCPD('BvC', 3, values=[[ 0.10, 0.20, 0.15, 0.05, 0.60, 0.10, 0.20, 0.15, 0.75, 0.60, 0.10, 0.20, 0.90, 0.75, 0.60, 0.10 ], [ 0.10, 0.60, 0.75, 0.90, 0.20, 0.10, 0.60, 0.75, 0.15, 0.20, 0.10, 0.60, 0.05, 0.15, 0.20, 0.10 ], [ 0.80, 0.20, 0.10, 0.05, 0.20, 0.80, 0.20, 0.10, 0.10, 0.20, 0.80, 0.20, 0.05, 0.10, 0.20, 0.80 ]], evidence=['B', 'C'], evidence_card=[4, 4]) cpd_cva = TabularCPD('CvA', 3, values=[[ 0.10, 0.20, 0.15, 0.05, 0.60, 0.10, 0.20, 0.15, 0.75, 0.60, 0.10, 0.20, 0.90, 0.75, 0.60, 0.10 ], [ 0.10, 0.60, 0.75, 0.90, 0.20, 0.10, 0.60, 0.75, 0.15, 0.20, 0.10, 0.60, 0.05, 0.15, 0.20, 0.10 ], [ 0.80, 0.20, 0.10, 0.05, 0.20, 0.80, 0.20, 0.10, 0.10, 0.20, 0.80, 0.20, 0.05, 0.10, 0.20, 0.80 ]], evidence=['C', 'A'], evidence_card=[4, 4]) BayesNet.add_cpds(cpd_a, cpd_b, cpd_c, cpd_avb, cpd_bvc, cpd_cva) return BayesNet
class TestXMLBIFWriterMethodsString(unittest.TestCase): def setUp(self): edges = [['family-out', 'dog-out'], ['bowel-problem', 'dog-out'], ['family-out', 'light-on'], ['dog-out', 'hear-bark']] cpds = { 'bowel-problem': np.array([[0.01], [0.99]]), 'dog-out': np.array([[0.99, 0.01, 0.97, 0.03], [0.9, 0.1, 0.3, 0.7]]), 'family-out': np.array([[0.15], [0.85]]), 'hear-bark': np.array([[0.7, 0.3], [0.01, 0.99]]), 'light-on': np.array([[0.6, 0.4], [0.05, 0.95]]) } states = { 'bowel-problem': ['true', 'false'], 'dog-out': ['true', 'false'], 'family-out': ['true', 'false'], 'hear-bark': ['true', 'false'], 'light-on': ['true', 'false'] } parents = { 'bowel-problem': [], 'dog-out': ['family-out', 'bowel-problem'], 'family-out': [], 'hear-bark': ['dog-out'], 'light-on': ['family-out'] } properties = { 'bowel-problem': ['position = (190, 69)'], 'dog-out': ['position = (155, 165)'], 'family-out': ['position = (112, 69)'], 'hear-bark': ['position = (154, 241)'], 'light-on': ['position = (73, 165)'] } self.model = BayesianModel(edges) tabular_cpds = [] for var, values in cpds.items(): cpd = TabularCPD(var, len(states[var]), values, evidence=parents[var], evidence_card=[ len(states[evidence_var]) for evidence_var in parents[var] ]) tabular_cpds.append(cpd) self.model.add_cpds(*tabular_cpds) for node, properties in properties.items(): for prop in properties: prop_name, prop_value = map(lambda t: t.strip(), prop.split('=')) self.model.node[node][prop_name] = prop_value self.writer = XMLBIFWriter(model=self.model) def test_file(self): self.expected_xml = etree.XML("""<BIF version="0.3"> <NETWORK> <VARIABLE TYPE="nature"> <NAME>bowel-problem</NAME> <OUTCOME>0</OUTCOME> <OUTCOME>1</OUTCOME> <PROPERTY>position = (190, 69)</PROPERTY> </VARIABLE> <VARIABLE TYPE="nature"> <NAME>dog-out</NAME> <OUTCOME>0</OUTCOME> <OUTCOME>1</OUTCOME> <PROPERTY>position = (155, 165)</PROPERTY> </VARIABLE> <VARIABLE TYPE="nature"> <NAME>family-out</NAME> <OUTCOME>0</OUTCOME> <OUTCOME>1</OUTCOME> <PROPERTY>position = (112, 69)</PROPERTY> </VARIABLE> <VARIABLE TYPE="nature"> <NAME>hear-bark</NAME> <OUTCOME>0</OUTCOME> <OUTCOME>1</OUTCOME> <PROPERTY>position = (154, 241)</PROPERTY> </VARIABLE> <VARIABLE TYPE="nature"> <NAME>light-on</NAME> <OUTCOME>0</OUTCOME> <OUTCOME>1</OUTCOME> <PROPERTY>position = (73, 165)</PROPERTY> </VARIABLE> <DEFINITION> <FOR>bowel-problem</FOR> <TABLE>0.01 0.99 </TABLE> </DEFINITION> <DEFINITION> <FOR>dog-out</FOR> <GIVEN>bowel-problem</GIVEN> <GIVEN>family-out</GIVEN> <TABLE>0.99 0.01 0.97 0.03 0.9 0.1 0.3 0.7 </TABLE> </DEFINITION> <DEFINITION> <FOR>family-out</FOR> <TABLE>0.15 0.85 </TABLE> </DEFINITION> <DEFINITION> <FOR>hear-bark</FOR> <GIVEN>dog-out</GIVEN> <TABLE>0.7 0.3 0.01 0.99 </TABLE> </DEFINITION> <DEFINITION> <FOR>light-on</FOR> <GIVEN>family-out</GIVEN> <TABLE>0.6 0.4 0.05 0.95 </TABLE> </DEFINITION> </NETWORK> </BIF>""") self.maxDiff = None self.writer.write_xmlbif("test_bif.xml") with open("test_bif.xml", "r") as myfile: data = myfile.read() self.assertEqual(str(self.writer.__str__()[:-1]), str(etree.tostring(self.expected_xml))) self.assertEqual( str(data), str(etree.tostring(self.expected_xml).decode('utf-8')))
evidence=['X1', 'X2'], evidence_card=[2, 2]) noise = 0.2 for i in range(3): parent = 'X{}'.format(i + 1) child = 'Y{}'.format(i + 1) CPDs[child] = TabularCPD(variable=child, variable_card=2, values=[[1 - noise, noise], [noise, 1 - noise]], evidence=[parent], evidence_card=[2]) # Make model for cpd in CPDs.values(): model.add_cpds(cpd) model.check_model() from pgmpy.inference import VariableElimination infer = VariableElimination(model) # Inference evidence = {'Y1': 1, 'Y2': 0, 'Y3': 0} marginals = {} for i in range(3): name = 'X{}'.format(i + 1) post = infer.query([name], evidence=evidence).values marginals[name] = post print(marginals)
# +---------+---------+---------+---------+---------+ # | diff | intel_0 | intel_0 | intel_1 | intel_1 | # +---------+---------+---------+---------+---------+ # | intel | diff_0 | diff_1 | diff_0 | diff_1 | # +---------+---------+---------+---------+---------+ # | grade_0 | 0.3 | 0.05 | 0.9 | 0.5 | # +---------+---------+---------+---------+---------+ # | grade_1 | 0.4 | 0.25 | 0.08 | 0.3 | # +---------+---------+---------+---------+---------+ # | grade_2 | 0.3 | 0.7 | 0.02 | 0.2 | # +---------+---------+---------+---------+---------+ cpd_g = TabularCPD(variable='G', variable_card=2, values=[[0.9, 0.9, 0.8, 0.1], [0.1, 0.1, 0.2, 0.9]], evidence=['F', 'B'], evidence_card=[2, 2]) # Associating the CPDs with the network model.add_cpds(cpd_b, cpd_f, cpd_g) # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly # defined and sum to 1. model.check_model() # These defined CPDs can be added to the model. Since, the model already has CPDs associated to variables, it will # show warning that pmgpy is now replacing those CPDs with the new ones. model.get_cpds() print(cpd_g) infer = VariableElimination(model) print(infer.query(['G']))
''' Created on Sep 21, 2017 @author: Adele ''' from pgmpy.models import BayesianModel from pgmpy.factors.discrete import TabularCPD from pgmpy.inference import BeliefPropagation G = BayesianModel([('diff', 'grade'), ('intel', 'grade'), ('intel', 'SAT'), ('grade', 'letter')]) diff_cpd = TabularCPD('diff', 2, [[0.2], [0.8]]) intel_cpd = TabularCPD('intel', 3, [[0.5], [0.3], [0.2]]) grade_cpd = TabularCPD( 'grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3]) sat_cpd = TabularCPD('SAT', 2, [[0.1, 0.2, 0.7], [0.9, 0.8, 0.3]], evidence=['intel'], evidence_card=[3]) letter_cpd = TabularCPD('letter', 2, [[0.1, 0.4, 0.8], [0.9, 0.6, 0.2]], evidence=['grade'], evidence_card=[3]) G.add_cpds(diff_cpd, intel_cpd, grade_cpd, sat_cpd, letter_cpd) bp = BeliefPropagation(G) bp.calibrate()
class TestBayesianModelCPD(unittest.TestCase): def setUp(self): self.G = BayesianModel([('d', 'g'), ('i', 'g'), ('g', 'l'), ('i', 's')]) def test_active_trail_nodes(self): self.assertEqual(sorted(self.G.active_trail_nodes('d')), ['d', 'g', 'l']) self.assertEqual(sorted(self.G.active_trail_nodes('i')), ['g', 'i', 'l', 's']) def test_active_trail_nodes_args(self): self.assertEqual(sorted(self.G.active_trail_nodes('d', observed='g')), ['d', 'i', 's']) self.assertEqual(sorted(self.G.active_trail_nodes('l', observed='g')), ['l']) self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['i', 'l'])), ['s']) self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['d', 'l'])), ['g', 'i', 's']) def test_is_active_trail_triplets(self): self.assertTrue(self.G.is_active_trail('d', 'l')) self.assertTrue(self.G.is_active_trail('g', 's')) self.assertFalse(self.G.is_active_trail('d', 'i')) self.assertTrue(self.G.is_active_trail('d', 'i', observed='g')) self.assertFalse(self.G.is_active_trail('d', 'l', observed='g')) self.assertFalse(self.G.is_active_trail('i', 'l', observed='g')) self.assertTrue(self.G.is_active_trail('d', 'i', observed='l')) self.assertFalse(self.G.is_active_trail('g', 's', observed='i')) def test_is_active_trail(self): self.assertFalse(self.G.is_active_trail('d', 's')) self.assertTrue(self.G.is_active_trail('s', 'l')) self.assertTrue(self.G.is_active_trail('d', 's', observed='g')) self.assertFalse(self.G.is_active_trail('s', 'l', observed='g')) def test_is_active_trail_args(self): self.assertFalse(self.G.is_active_trail('s', 'l', 'i')) self.assertFalse(self.G.is_active_trail('s', 'l', 'g')) self.assertTrue(self.G.is_active_trail('d', 's', 'l')) self.assertFalse(self.G.is_active_trail('d', 's', ['i', 'l'])) def test_get_cpds(self): cpd_d = TabularCPD('d', 2, np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2]) cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2) cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d').variable, 'd') def test_get_cpds1(self): self.model = BayesianModel([('A', 'AB')]) cpd_a = TabularCPD('A', 2, np.random.rand(2, 1)) cpd_ab = TabularCPD('AB', 2, np.random.rand(2, 2), evidence=['A'], evidence_card=[2]) self.model.add_cpds(cpd_a, cpd_ab) self.assertEqual(self.model.get_cpds('A').variable, 'A') self.assertEqual(self.model.get_cpds('AB').variable, 'AB') def test_add_single_cpd(self): cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_s) self.assertListEqual(self.G.get_cpds(), [cpd_s]) def test_add_multiple_cpds(self): cpd_d = TabularCPD('d', 2, np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2]) cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2) cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d'), cpd_d) self.assertEqual(self.G.get_cpds('i'), cpd_i) self.assertEqual(self.G.get_cpds('g'), cpd_g) self.assertEqual(self.G.get_cpds('l'), cpd_l) self.assertEqual(self.G.get_cpds('s'), cpd_s) def test_check_model(self): cpd_g = TabularCPD('g', 2, np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), ['d', 'i'], [2, 2]) cpd_s = TabularCPD('s', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['i'], 2) cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['g'], 2) self.G.add_cpds(cpd_g, cpd_s, cpd_l) self.assertTrue(self.G.check_model()) def test_check_model1(self): cpd_g = TabularCPD('g', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['i'], 2) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_g = TabularCPD('g', 2, np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), ['d', 's'], [2, 2]) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_g = TabularCPD('g', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['l'], 2) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['d'], 2) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), ['d', 'i'], [2, 2]) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3, 0.4, 0.6, 0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4, 0.8, 0.7, 0.6, 0.4]]), ['g', 'd', 'i'], [2, 2, 2]) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) def test_check_model2(self): cpd_s = TabularCPD('s', 2, np.array([[0.5, 0.3], [0.8, 0.7]]), ['i'], 2) self.G.add_cpds(cpd_s) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_s) cpd_g = TabularCPD('g', 2, np.array([[0.2, 0.3, 0.4, 0.6], [0.3, 0.7, 0.6, 0.4]]), ['d', 'i'], [2, 2]) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3], [0.1, 0.7]]), ['g'], 2) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) def tearDown(self): del self.G
values=[[.3, .05, .9, .5], [.4, .25, .08, .3], [.3, .7, .02, .2]], evidence=['I', 'D'], evidence_card=[2, 2]) letter_cpd = TabularCPD(variable='L', variable_card=2, values=[[.1, 0.4, .99], [.9, 0.6, .01]], evidence=['G'], evidence_card=[3]) # buildind model student_model = BayesianModel([('D', 'G'), ('I', 'G'), ('I', 'S'), ('G', 'L')]) # adding cpds student_model.add_cpds(difficulty_cpd, intelligence_cpd, sat_cpd, grade_cpd, letter_cpd) model_name = "student" # json dump part import json data = { "name": "student", "short_description": "give short desc here", "long_description": "give long desc here", "nodes": { "L": "Letter", "D": "Difficulty", "G": "Grade", "I": "Intelligence", "S": "SAT Scores"
class TestInferenceBase(unittest.TestCase): def setUp(self): self.bayesian = BayesianModel([('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e')]) a_cpd = TabularCPD('a', 2, [[0.4, 0.6]]) b_cpd = TabularCPD('b', 2, [[0.2, 0.4], [0.3, 0.4]], evidence='a', evidence_card=[2]) c_cpd = TabularCPD('c', 2, [[0.1, 0.2], [0.3, 0.4]], evidence='b', evidence_card=[2]) d_cpd = TabularCPD('d', 2, [[0.4, 0.3], [0.2, 0.1]], evidence='c', evidence_card=[2]) e_cpd = TabularCPD('e', 2, [[0.3, 0.2], [0.4, 0.1]], evidence='d', evidence_card=[2]) self.bayesian.add_cpds(a_cpd, b_cpd, c_cpd, d_cpd, e_cpd) self.markov = MarkovModel([('a', 'b'), ('b', 'd'), ('a', 'c'), ('c', 'd')]) factor_1 = Factor(['a', 'b'], [2, 2], np.array([100, 1, 1, 100])) factor_2 = Factor(['a', 'c'], [2, 2], np.array([40, 30, 100, 20])) factor_3 = Factor(['b', 'd'], [2, 2], np.array([1, 100, 100, 1])) factor_4 = Factor(['c', 'd'], [2, 2], np.array([60, 60, 40, 40])) self.markov.add_factors(factor_1, factor_2, factor_3, factor_4) def test_bayesian_inference_init(self): infer_bayesian = Inference(self.bayesian) self.assertEqual(set(infer_bayesian.variables), {'a', 'b', 'c', 'd', 'e'}) self.assertEqual(infer_bayesian.cardinality, {'a': 2, 'b': 2, 'c': 2, 'd': 2, 'e': 2}) self.assertIsInstance(infer_bayesian.factors, defaultdict) self.assertEqual(set(infer_bayesian.factors['a']), set([self.bayesian.get_cpds('a').to_factor(), self.bayesian.get_cpds('b').to_factor()])) self.assertEqual(set(infer_bayesian.factors['b']), set([self.bayesian.get_cpds('b').to_factor(), self.bayesian.get_cpds('c').to_factor()])) self.assertEqual(set(infer_bayesian.factors['c']), set([self.bayesian.get_cpds('c').to_factor(), self.bayesian.get_cpds('d').to_factor()])) self.assertEqual(set(infer_bayesian.factors['d']), set([self.bayesian.get_cpds('d').to_factor(), self.bayesian.get_cpds('e').to_factor()])) self.assertEqual(set(infer_bayesian.factors['e']), set([self.bayesian.get_cpds('e').to_factor()])) def test_markov_inference_init(self): infer_markov = Inference(self.markov) self.assertEqual(set(infer_markov.variables), {'a', 'b', 'c', 'd'}) self.assertEqual(infer_markov.cardinality, {'a': 2, 'b': 2, 'c': 2, 'd': 2}) self.assertEqual(infer_markov.factors, {'a': [Factor(['a', 'b'], [2, 2], np.array([100, 1, 1, 100])), Factor(['a', 'c'], [2, 2], np.array([40, 30, 100, 20]))], 'b': [Factor(['a', 'b'], [2, 2], np.array([100, 1, 1, 100])), Factor(['b', 'd'], [2, 2], np.array([1, 100, 100, 1]))], 'c': [Factor(['a', 'c'], [2, 2], np.array([40, 30, 100, 20])), Factor(['c', 'd'], [2, 2], np.array([60, 60, 40, 40]))], 'd': [Factor(['b', 'd'], [2, 2], np.array([1, 100, 100, 1])), Factor(['c', 'd'], [2, 2], np.array([60, 60, 40, 40]))]})
def factorise(state, graph): state_names = [n.name for n in state.dom.names] graph_nodes = graph.get_nodes() graph_names = [n.get_name() for n in graph_nodes] l = len(state_names) if l != len(graph_names): raise Exception('Missing domain names of state in factorisation') if set(state_names) != set(graph_names): raise Exception('Non-matching graph and state names in factorisation') # make dictionary of names and corresponding masks masks = {} for i in range(l): ls = l * [0] ls[i] = 1 masks[state_names[i]] = ls # dictionary to be filled with cpts = conditional probability tables model = BayesianModel() model.add_nodes_from(graph_names) for node in graph_nodes: parents = get_parents(node) key = node.get_name() mask_cod = masks[key] if len(parents) == 0: # marginalise for initial nodes initial_state = state % mask_cod if len(initial_state.dom) > 1: raise Exception('Initial states must have dimension 1') dom_card = len(initial_state.dom[0]) state_array = initial_state.array cpd_array = np.zeros((dom_card, 1)) for i in range(dom_card): cpd_array[i][0] = state_array[i] cpd = TabularCPD(variable=key, variable_card=dom_card, values=cpd_array) model.add_cpds(cpd) else: # add edges and form conditional probility for internal nodes for p in parents: model.add_edge(p, key) mask_dom = mask_summation([masks[p] for p in parents]) chan = state[mask_cod:mask_dom] cod = chan.cod dom = chan.dom if len(cod) > 1: raise Exception('Domains must have dimension 1') chan_array = chan.array print("* ", key, len(dom[0]), len(cod[0]), chan_array.shape) prod = reduce(operator.mul, [len(d) for d in dom], 1) cpd_array = np.zeros((len(cod[0]), prod)) for i in range(len(dom[0])): cpd_array[i] = [ chan_array[i][j] for j in np.ndindex(*chan_array[i].shape) ] cpd = TabularCPD(variable=key, variable_card=len(cod[0]), values=cpd_array, evidence=parents, evidence_card=[len(d) for d in dom]) model.add_cpds(cpd) if not model.check_model(): raise Exception('Constructed model does not pass check') return model
def bayesnet(): """ References: https://class.coursera.org/pgm-003/lecture/17 http://www.cs.ubc.ca/~murphyk/Bayes/bnintro.html http://www3.cs.stonybrook.edu/~sael/teaching/cse537/Slides/chapter14d_BP.pdf http://www.cse.unsw.edu.au/~cs9417ml/Bayes/Pages/PearlPropagation.html https://github.com/pgmpy/pgmpy.git http://pgmpy.readthedocs.org/en/latest/ http://nipy.bic.berkeley.edu:5000/download/11 """ # import operator as op # # Enumerate all possible events # varcard_list = list(map(op.attrgetter('variable_card'), cpd_list)) # _esdat = list(ut.iprod(*map(range, varcard_list))) # _escol = list(map(op.attrgetter('variable'), cpd_list)) # event_space = pd.DataFrame(_esdat, columns=_escol) # # Custom compression of event space to inspect a specific graph # def compress_space_flags(event_space, var1, var2, var3, cmp12_): # """ # var1, var2, cmp_ = 'Lj', 'Lk', op.eq # """ # import vtool as vt # data = event_space # other_cols = ut.setdiff_ordered(data.columns.tolist(), [var1, var2, var3]) # case_flags12 = cmp12_(data[var1], data[var2]).values # # case_flags23 = cmp23_(data[var2], data[var3]).values # # case_flags = np.logical_and(case_flags12, case_flags23) # case_flags = case_flags12 # case_flags = case_flags.astype(np.int64) # subspace = np.hstack((case_flags[:, None], data[other_cols].values)) # sel_ = vt.unique_row_indexes(subspace) # flags = np.logical_and(mask, case_flags) # return flags # # Build special cases # case_same = event_space.loc[compress_space_flags(event_space, 'Li', 'Lj', 'Lk', op.eq)] # case_diff = event_space.loc[compress_space_flags(event_space, 'Li', 'Lj', 'Lk', op.ne)] # special_cases = [ # case_same, # case_diff, # ] from pgmpy.factors import TabularCPD from pgmpy.models import BayesianModel import pandas as pd from pgmpy.inference import BeliefPropagation # NOQA from pgmpy.inference import VariableElimination # NOQA name_nice = ['n1', 'n2', 'n3'] score_nice = ['low', 'high'] match_nice = ['diff', 'same'] num_names = len(name_nice) num_scores = len(score_nice) nid_basis = list(range(num_names)) score_basis = list(range(num_scores)) semtype2_nice = { 'score': score_nice, 'name': name_nice, 'match': match_nice, } var2_cpd = { } globals()['semtype2_nice'] = semtype2_nice globals()['var2_cpd'] = var2_cpd name_combo = np.array(list(ut.iprod(nid_basis, nid_basis))) combo_is_same = name_combo.T[0] == name_combo.T[1] def get_expected_scores_prob(level1, level2): part1 = combo_is_same * level1 part2 = (1 - combo_is_same) * (1 - (level2)) expected_scores_level = part1 + part2 return expected_scores_level # def make_cpd(): def name_cpd(aid): from pgmpy.factors import TabularCPD cpd = TabularCPD( variable='N' + aid, variable_card=num_names, values=[[1.0 / num_names] * num_names]) cpd.semtype = 'name' return cpd name_cpds = [name_cpd('i'), name_cpd('j'), name_cpd('k')] var2_cpd.update(dict(zip([cpd.variable for cpd in name_cpds], name_cpds))) if True: num_same_diff = 2 samediff_measure = np.array([ # get_expected_scores_prob(.12, .2), # get_expected_scores_prob(.88, .8), get_expected_scores_prob(0, 0), get_expected_scores_prob(1, 1), ]) samediff_vals = (samediff_measure / samediff_measure.sum(axis=0)).tolist() def samediff_cpd(aid1, aid2): cpd = TabularCPD( variable='A' + aid1 + aid2, variable_card=num_same_diff, values=samediff_vals, evidence=['N' + aid1, 'N' + aid2], # [::-1], evidence_card=[num_names, num_names]) # [::-1]) cpd.semtype = 'match' return cpd samediff_cpds = [samediff_cpd('i', 'j'), samediff_cpd('j', 'k'), samediff_cpd('k', 'i')] var2_cpd.update(dict(zip([cpd.variable for cpd in samediff_cpds], samediff_cpds))) if True: def score_cpd(aid1, aid2): semtype = 'score' evidence = ['A' + aid1 + aid2, 'N' + aid1, 'N' + aid2] evidence_cpds = [var2_cpd[key] for key in evidence] evidence_nice = [semtype2_nice[cpd.semtype] for cpd in evidence_cpds] evidence_card = list(map(len, evidence_nice)) evidence_states = list(ut.iprod(*evidence_nice)) variable_basis = semtype2_nice[semtype] variable_values = [] for mystate in variable_basis: row = [] for state in evidence_states: if state[0] == state[1]: if state[2] == 'same': val = .2 if mystate == 'low' else .8 else: val = 1 # val = .5 if mystate == 'low' else .5 elif state[0] != state[1]: if state[2] == 'same': val = .5 if mystate == 'low' else .5 else: val = 1 # val = .9 if mystate == 'low' else .1 row.append(val) variable_values.append(row) cpd = TabularCPD( variable='S' + aid1 + aid2, variable_card=len(variable_basis), values=variable_values, evidence=evidence, # [::-1], evidence_card=evidence_card) # [::-1]) cpd.semtype = semtype return cpd else: score_values = [ [.8, .1], [.2, .9], ] def score_cpd(aid1, aid2): cpd = TabularCPD( variable='S' + aid1 + aid2, variable_card=num_scores, values=score_values, evidence=['A' + aid1 + aid2], # [::-1], evidence_card=[num_same_diff]) # [::-1]) cpd.semtype = 'score' return cpd score_cpds = [score_cpd('i', 'j'), score_cpd('j', 'k')] cpd_list = name_cpds + score_cpds + samediff_cpds else: score_measure = np.array([get_expected_scores_prob(level1, level2) for level1, level2 in zip(np.linspace(.1, .9, num_scores), np.linspace(.2, .8, num_scores))]) score_values = (score_measure / score_measure.sum(axis=0)).tolist() def score_cpd(aid1, aid2): cpd = TabularCPD( variable='S' + aid1 + aid2, variable_card=num_scores, values=score_values, evidence=['N' + aid1, 'N' + aid2], evidence_card=[num_names, num_names]) cpd.semtype = 'score' return cpd score_cpds = [score_cpd('i', 'j'), score_cpd('j', 'k')] cpd_list = name_cpds + score_cpds pass input_graph = [] for cpd in cpd_list: if cpd.evidence is not None: for evar in cpd.evidence: input_graph.append((evar, cpd.variable)) name_model = BayesianModel(input_graph) name_model.add_cpds(*cpd_list) var2_cpd.update(dict(zip([cpd.variable for cpd in cpd_list], cpd_list))) globals()['var2_cpd'] = var2_cpd varnames = [cpd.variable for cpd in cpd_list] # --- PRINT CPDS --- cpd = score_cpds[0] def print_cpd(cpd): print('CPT: %r' % (cpd,)) index = semtype2_nice[cpd.semtype] if cpd.evidence is None: columns = ['None'] else: basis_lists = [semtype2_nice[var2_cpd[ename].semtype] for ename in cpd.evidence] columns = [','.join(x) for x in ut.iprod(*basis_lists)] data = cpd.get_cpd() print(pd.DataFrame(data, index=index, columns=columns)) for cpd in name_model.get_cpds(): print('----') print(cpd._str('phi')) print_cpd(cpd) # --- INFERENCE --- Ni = name_cpds[0] event_space_combos = {} event_space_combos[Ni.variable] = 0 # Set ni to always be Fred for cpd in cpd_list: if cpd.semtype == 'score': event_space_combos[cpd.variable] = list(range(cpd.variable_card)) evidence_dict = ut.all_dict_combinations(event_space_combos) # Query about name of annotation k given different event space params def pretty_evidence(evidence): return [key + '=' + str(semtype2_nice[var2_cpd[key].semtype][val]) for key, val in evidence.items()] def print_factor(factor): row_cards = factor.cardinality row_vars = factor.variables values = factor.values.reshape(np.prod(row_cards), 1).flatten() # col_cards = 1 # col_vars = [''] basis_lists = list(zip(*list(ut.iprod(*[range(c) for c in row_cards])))) nice_basis_lists = [] for varname, basis in zip(row_vars, basis_lists): cpd = var2_cpd[varname] _nice_basis = ut.take(semtype2_nice[cpd.semtype], basis) nice_basis = ['%s=%s' % (varname, val) for val in _nice_basis] nice_basis_lists.append(nice_basis) row_lbls = [', '.join(sorted(x)) for x in zip(*nice_basis_lists)] print(ut.repr3(dict(zip(row_lbls, values)), precision=3, align=True, key_order_metric='-val')) # name_belief = BeliefPropagation(name_model) name_belief = VariableElimination(name_model) import pgmpy import six # NOQA def try_query(evidence): print('--------') query_vars = ut.setdiff_ordered(varnames, list(evidence.keys())) evidence_str = ', '.join(pretty_evidence(evidence)) probs = name_belief.query(query_vars, evidence) factor_list = probs.values() joint_factor = pgmpy.factors.factor_product(*factor_list) print('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ')') # print(six.text_type(joint_factor)) factor = joint_factor # NOQA # print_factor(factor) # import utool as ut print(ut.hz_str([(f._str(phi_or_p='phi')) for f in factor_list])) for evidence in evidence_dict: try_query(evidence) evidence = {'Aij': 1, 'Ajk': 1, 'Aki': 1, 'Ni': 0} try_query(evidence) evidence = {'Aij': 0, 'Ajk': 0, 'Aki': 0, 'Ni': 0} try_query(evidence) globals()['score_nice'] = score_nice globals()['name_nice'] = name_nice globals()['score_basis'] = score_basis globals()['nid_basis'] = nid_basis print('Independencies') print(name_model.get_independencies()) print(name_model.local_independencies([Ni.variable])) # name_belief = BeliefPropagation(name_model) # # name_belief = VariableElimination(name_model) # for case in special_cases: # test_data = case.drop('Lk', axis=1) # test_data = test_data.reset_index(drop=True) # print('----') # for i in range(test_data.shape[0]): # evidence = test_data.loc[i].to_dict() # probs = name_belief.query(['Lk'], evidence) # factor = probs['Lk'] # probs = factor.values # evidence_ = evidence.copy() # evidence_['Li'] = name_nice[evidence['Li']] # evidence_['Lj'] = name_nice[evidence['Lj']] # evidence_['Sij'] = score_nice[evidence['Sij']] # evidence_['Sjk'] = score_nice[evidence['Sjk']] # nice2_prob = ut.odict(zip(name_nice, probs.tolist())) # ut.print_python_code('P(Lk | {evidence}) = {cpt}'.format( # evidence=(ut.repr2(evidence_, explicit=True, nobraces=True, strvals=True)), # cpt=ut.repr3(nice2_prob, precision=3, align=True, key_order_metric='-val') # )) # for case in special_cases: # test_data = case.drop('Lk', axis=1) # test_data = test_data.drop('Lj', axis=1) # test_data = test_data.reset_index(drop=True) # print('----') # for i in range(test_data.shape[0]): # evidence = test_data.loc[i].to_dict() # query_vars = ['Lk', 'Lj'] # probs = name_belief.query(query_vars, evidence) # for queryvar in query_vars: # factor = probs[queryvar] # print(factor._str('phi')) # probs = factor.values # evidence_ = evidence.copy() # evidence_['Li'] = name_nice[evidence['Li']] # evidence_['Sij'] = score_nice[evidence['Sij']] # evidence_['Sjk'] = score_nice[evidence['Sjk']] # nice2_prob = ut.odict(zip([queryvar + '=' + x for x in name_nice], probs.tolist())) # ut.print_python_code('P({queryvar} | {evidence}) = {cpt}'.format( # query_var=query_var, # evidence=(ut.repr2(evidence_, explicit=True, nobraces=True, strvals=True)), # cpt=ut.repr3(nice2_prob, precision=3, align=True, key_order_metric='-val') # )) # _ draw model import plottool as pt import networkx as netx fig = pt.figure() # NOQA fig.clf() ax = pt.gca() netx_nodes = [(node, {}) for node in name_model.nodes()] netx_edges = [(etup[0], etup[1], {}) for etup in name_model.edges()] netx_graph = netx.DiGraph() netx_graph.add_nodes_from(netx_nodes) netx_graph.add_edges_from(netx_edges) # pos = netx.graphviz_layout(netx_graph) pos = netx.pydot_layout(netx_graph, prog='dot') netx.draw(netx_graph, pos=pos, ax=ax, with_labels=True) pt.plt.savefig('foo.png') ut.startfile('foo.png')
from pgmpy.models import BayesianModel from pgmpy.factors.discrete import TabularCPD # Defining the network structure model = BayesianModel([('C', 'H'), ('P', 'H')]) # Defining the CPDs: cpd_c = TabularCPD('C', 3, [[0.33, 0.33, 0.33]]) cpd_p = TabularCPD('P', 3, [[0.33, 0.33, 0.33]]) cpd_h = TabularCPD( 'H', 3, [[0, 0, 0, 0, 0.5, 1, 0, 1, 0.5], [0.5, 0, 1, 0, 0, 0, 1, 0, 0.5], [0.5, 1, 0, 1, 0.5, 0, 0, 0, 0]], evidence=['C', 'P'], evidence_card=[3, 3]) # Associating the CPDs with the network structure. model.add_cpds(cpd_c, cpd_p, cpd_h) # Some other methods print model.get_cpds() print model.check_model() # Infering the posterior probability from pgmpy.inference import VariableElimination infer = VariableElimination(model) posterior_p = infer.query(['P'], evidence={'C': 0, 'H': 2}) print(posterior_p['P'])
def get_game_network(): """Create a Bayes Net representation of the game problem. Name the nodes as "A","B","C","AvB","BvC" and "CvA". """ # Add nodes BayesNet = BayesianModel() BayesNet.add_node("A") BayesNet.add_node("B") BayesNet.add_node("C") BayesNet.add_node("AvB") BayesNet.add_node("BvC") BayesNet.add_node("CvA") # Add edges BayesNet.add_edge("A", "AvB") BayesNet.add_edge("A", "CvA") BayesNet.add_edge("B", "AvB") BayesNet.add_edge("B", "BvC") BayesNet.add_edge("C", "BvC") BayesNet.add_edge("C", "CvA") # Add probabilities cpd_A = TabularCPD("A", 4, values=[[0.15], [0.45], [0.30], [0.10]]) cpd_B = TabularCPD("B", 4, values=[[0.15], [0.45], [0.30], [0.10]]) cpd_C = TabularCPD("C", 4, values=[[0.15], [0.45], [0.30], [0.10]]) cpd_avb = TabularCPD("AvB", 3, values=[[ 0.1, 0.2, 0.15, 0.05, 0.6, 0.1, 0.2, 0.15, 0.75, 0.6, 0.1, 0.2, 0.9, 0.75, 0.6, 0.1 ], [ 0.1, 0.6, 0.75, 0.9, 0.2, 0.1, 0.6, 0.75, 0.15, 0.2, 0.1, 0.6, 0.05, 0.15, 0.2, 0.1 ], [ 0.8, 0.2, 0.1, 0.05, 0.2, 0.8, 0.2, 0.1, 0.1, 0.2, 0.8, 0.2, 0.05, 0.1, 0.2, 0.8 ]], evidence=["A", "B"], evidence_card=[4, 4]) cpd_bvc = TabularCPD("BvC", 3, values=[[ 0.1, 0.2, 0.15, 0.05, 0.6, 0.1, 0.2, 0.15, 0.75, 0.6, 0.1, 0.2, 0.9, 0.75, 0.6, 0.1 ], [ 0.1, 0.6, 0.75, 0.9, 0.2, 0.1, 0.6, 0.75, 0.15, 0.2, 0.1, 0.6, 0.05, 0.15, 0.2, 0.1 ], [ 0.8, 0.2, 0.1, 0.05, 0.2, 0.8, 0.2, 0.1, 0.1, 0.2, 0.8, 0.2, 0.05, 0.1, 0.2, 0.8 ]], evidence=["B", "C"], evidence_card=[4, 4]) cpd_avc = TabularCPD("CvA", 3, values=[[ 0.1, 0.2, 0.15, 0.05, 0.6, 0.1, 0.2, 0.15, 0.75, 0.6, 0.1, 0.2, 0.9, 0.75, 0.6, 0.1 ], [ 0.1, 0.6, 0.75, 0.9, 0.2, 0.1, 0.6, 0.75, 0.15, 0.2, 0.1, 0.6, 0.05, 0.15, 0.2, 0.1 ], [ 0.8, 0.2, 0.1, 0.05, 0.2, 0.8, 0.2, 0.1, 0.1, 0.2, 0.8, 0.2, 0.05, 0.1, 0.2, 0.8 ]], evidence=["C", "A"], evidence_card=[4, 4]) BayesNet.add_cpds(cpd_A, cpd_B, cpd_C, cpd_avb, cpd_bvc, cpd_avc) return BayesNet
class TestBayesianModelMethods(unittest.TestCase): def setUp(self): self.G = BayesianModel([('a', 'd'), ('b', 'd'), ('d', 'e'), ('b', 'c')]) self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]]) intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]]) grade_cpd = TabularCPD('grade', 3, values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3]) self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd) self.G2 = BayesianModel([('d', 'g'), ('g', 'l'), ('i', 'g'), ('i', 'l')]) def test_moral_graph(self): moral_graph = self.G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e']) for edge in moral_graph.edges(): self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')] or (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')]) def test_moral_graph_with_edge_present_over_parents(self): G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'), ('a', 'b')]) moral_graph = G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e']) for edge in moral_graph.edges(): self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')] or (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')]) def test_get_ancestors_of_success(self): ancenstors1 = self.G2._get_ancestors_of('g') ancenstors2 = self.G2._get_ancestors_of('d') ancenstors3 = self.G2._get_ancestors_of(['i', 'l']) self.assertEqual(ancenstors1, {'d', 'i', 'g'}) self.assertEqual(ancenstors2, {'d'}) self.assertEqual(ancenstors3, {'g', 'i', 'l', 'd'}) def test_get_ancestors_of_failure(self): self.assertRaises(ValueError, self.G2._get_ancestors_of, 'h') def test_local_independencies(self): self.assertEqual(self.G.local_independencies('a'), Independencies(['a', ['b', 'c']])) self.assertEqual(self.G.local_independencies('c'), Independencies(['c', ['a', 'd', 'e'], 'b'])) self.assertEqual(self.G.local_independencies('d'), Independencies(['d', 'c', ['b', 'a']])) self.assertEqual(self.G.local_independencies('e'), Independencies(['e', ['c', 'b', 'a'], 'd'])) self.assertEqual(self.G.local_independencies('b'), Independencies(['b', 'a'])) self.assertEqual(self.G1.local_independencies('grade'), Independencies()) def test_get_independencies(self): chain = BayesianModel([('X', 'Y'), ('Y', 'Z')]) self.assertEqual(chain.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) fork = BayesianModel([('Y', 'X'), ('Y', 'Z')]) self.assertEqual(fork.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) collider = BayesianModel([('X', 'Y'), ('Z', 'Y')]) self.assertEqual(collider.get_independencies(), Independencies(('X', 'Z'), ('Z', 'X'))) def test_is_imap(self): val = [0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032, 0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128] JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'], [2, 3, 3], val) fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val) self.assertTrue(self.G1.is_imap(JPD)) self.assertRaises(TypeError, self.G1.is_imap, fac) def test_get_immoralities(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')}) G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')]) self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')}) G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'), ('w', 'x')]) self.assertEqual(G2.get_immoralities(), {('w', 'z')}) def test_is_iequivalent(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertRaises(TypeError, G.is_iequivalent, MarkovModel()) G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')]) G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')]) self.assertTrue(G1.is_iequivalent(G2)) G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')]) self.assertFalse(G3.is_iequivalent(G2)) def test_copy(self): model_copy = self.G1.copy() self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) self.assertNotEqual(id(self.G1.get_cpds('diff')), id(model_copy.get_cpds('diff'))) self.G1.remove_cpds('diff') diff_cpd = TabularCPD('diff', 2, values=[[0.3], [0.7]]) self.G1.add_cpds(diff_cpd) self.assertNotEqual(self.G1.get_cpds('diff'), model_copy.get_cpds('diff')) self.G1.remove_node('intel') self.assertNotEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertNotEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) def test_remove_node(self): self.G1.remove_node('diff') self.assertEqual(sorted(self.G1.nodes()), sorted(['grade', 'intel'])) self.assertRaises(ValueError, self.G1.get_cpds, 'diff') def test_remove_nodes_from(self): self.G1.remove_nodes_from(['diff', 'grade']) self.assertEqual(sorted(self.G1.nodes()), sorted(['intel'])) self.assertRaises(ValueError, self.G1.get_cpds, 'diff') self.assertRaises(ValueError, self.G1.get_cpds, 'grade') def tearDown(self): del self.G del self.G1
model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) model.fit(values) predict_data = predict_data.copy() predict_data.drop('E', axis=1, inplace=True) #print predict_data y_pred = model.predict(predict_data) y_prob = model.predict_probability(predict_data) from pgmpy.sampling import BayesianModelSampling model = BayesianModel([('D', 'G'), ('I', 'G')]) cpd_d = TabularCPD('D', 2, [[0.6], [0.4]]) cpd_i = TabularCPD('I', 2, [[0.7], [0.3]]) cpd_g = TabularCPD('G', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ['D', 'I'], [2, 2]) model.add_cpds(cpd_d, cpd_i, cpd_g) infer = BayesianModelSampling(model) data = infer.forward_sample(500) #print data model.fit(data, estimator=MaximumLikelihoodEstimator) for cpd in model.get_cpds(): print("CPD of {variable}:".format(variable=cpd.variable)) print(cpd)
class TestVariableElimination(unittest.TestCase): def setUp(self): self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, [[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, [[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], ['R', 'A'], [2, 2]) cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2]) cpd_l = TabularCPD('L', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], ['G', 'J'], [2, 2]) cpd_g = TabularCPD('G', 2, [[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.bayesian_inference = VariableElimination(self.bayesian_model) # All the values that are used for comparision in the all the tests are # found using SAMIAM (assuming that it is correct ;)) def test_query_single_variable(self): query_result = self.bayesian_inference.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) def test_query_multiple_variable(self): query_result = self.bayesian_inference.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) def test_query_single_variable_with_evidence(self): query_result = self.bayesian_inference.query(variables=['J'], evidence={'A': 0, 'R': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) def test_query_multiple_variable_with_evidence(self): query_result = self.bayesian_inference.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_query_multiple_times(self): # This just tests that the models are not getting modified while querying them query_result = self.bayesian_inference.query(['J']) query_result = self.bayesian_inference.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) query_result = self.bayesian_inference.query(['Q', 'J']) query_result = self.bayesian_inference.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) query_result = self.bayesian_inference.query(variables=['J'], evidence={'A': 0, 'R': 1}) query_result = self.bayesian_inference.query(variables=['J'], evidence={'A': 0, 'R': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) query_result = self.bayesian_inference.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}) query_result = self.bayesian_inference.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_max_marginal(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(), 0.1659, decimal=4) def test_max_marginal_var(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G']), 0.5714, decimal=4) def test_max_marginal_var1(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G', 'R']), 0.4055, decimal=4) def test_max_marginal_var2(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G', 'R', 'A']), 0.3260, decimal=4) def test_map_query(self): map_query = self.bayesian_inference.map_query() self.assertDictEqual(map_query, {'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0, 'L': 0}) def test_map_query_with_evidence(self): map_query = self.bayesian_inference.map_query(['A', 'R', 'L'], {'J': 0, 'Q': 1, 'G': 0}) self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0}) def test_induced_graph(self): induced_graph = self.bayesian_inference.induced_graph(['G', 'Q', 'A', 'J', 'L', 'R']) result_edges = sorted([sorted(x) for x in induced_graph.edges()]) self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'], ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']], result_edges) def test_induced_width(self): result_width = self.bayesian_inference.induced_width(['G', 'Q', 'A', 'J', 'L', 'R']) self.assertEqual(2, result_width) def tearDown(self): del self.bayesian_inference del self.bayesian_model
class TestUAIWriter(unittest.TestCase): def setUp(self): self.maxDiff = None edges = [['family-out', 'dog-out'], ['bowel-problem', 'dog-out'], ['family-out', 'light-on'], ['dog-out', 'hear-bark']] cpds = {'bowel-problem': np.array([[0.01], [0.99]]), 'dog-out': np.array([[0.99, 0.01, 0.97, 0.03], [0.9, 0.1, 0.3, 0.7]]), 'family-out': np.array([[0.15], [0.85]]), 'hear-bark': np.array([[0.7, 0.3], [0.01, 0.99]]), 'light-on': np.array([[0.6, 0.4], [0.05, 0.95]])} states = {'bowel-problem': ['true', 'false'], 'dog-out': ['true', 'false'], 'family-out': ['true', 'false'], 'hear-bark': ['true', 'false'], 'light-on': ['true', 'false']} parents = {'bowel-problem': [], 'dog-out': ['family-out', 'bowel-problem'], 'family-out': [], 'hear-bark': ['dog-out'], 'light-on': ['family-out']} self.bayesmodel = BayesianModel(edges) tabular_cpds = [] for var, values in cpds.items(): cpd = TabularCPD(var, len(states[var]), values, evidence=parents[var], evidence_card=[len(states[evidence_var]) for evidence_var in parents[var]]) tabular_cpds.append(cpd) self.bayesmodel.add_cpds(*tabular_cpds) self.bayeswriter = UAIWriter(self.bayesmodel) edges = {('var_0', 'var_1'), ('var_0', 'var_2'), ('var_1', 'var_2')} self.markovmodel = MarkovModel(edges) tables = [(['var_0', 'var_1'], ['4.000', '2.400', '1.000', '0.000']), (['var_0', 'var_1', 'var_2'], ['2.2500', '3.2500', '3.7500', '0.0000', '0.0000', '10.0000', '1.8750', '4.0000', '3.3330', '2.0000', '2.0000', '3.4000'])] domain = {'var_1': '2', 'var_2': '3', 'var_0': '2'} factors = [] for table in tables: variables = table[0] cardinality = [int(domain[var]) for var in variables] values = list(map(float, table[1])) factor = Factor(variables, cardinality, values) factors.append(factor) self.markovmodel.add_factors(*factors) self.markovwriter = UAIWriter(self.markovmodel) def test_bayes_model(self): self.expected_bayes_file = """BAYES 5 2 2 2 2 2 5 1 0 3 2 0 1 1 2 2 1 3 2 2 4 2 0.01 0.99 8 0.99 0.01 0.97 0.03 0.9 0.1 0.3 0.7 2 0.15 0.85 4 0.7 0.3 0.01 0.99 4 0.6 0.4 0.05 0.95""" self.assertEqual(str(self.bayeswriter.__str__()), str(self.expected_bayes_file)) def test_markov_model(self): self.expected_markov_file = """MARKOV 3 2 2 3 2 2 0 1 3 0 1 2 4 4.0 2.4 1.0 0.0 12 2.25 3.25 3.75 0.0 0.0 10.0 1.875 4.0 3.333 2.0 2.0 3.4""" self.assertEqual(str(self.markovwriter.__str__()), str(self.expected_markov_file))
'A': ['1', '0'], 'D': ['1', '0'] }) #Boss capabilites cpd_B = TabularCPD(variable='B', variable_card=2, values=[[0.01, 0.45, 0.28, 0.95], [0.99, 0.55, 0.72, 0.05]], evidence=['L', 'M'], evidence_card=[2, 2], state_names={ 'B': ['1', '0'], 'L': ['1', '0'], 'M': ['1', '0'] }) G.add_cpds(cpd_A, cpd_D, cpd_M, cpd_L, cpd_B) infer = VariableElimination(G) l_dist = infer.query(['L']) b_dist = infer.query(['B']) ex1_dist = infer.query(['B'], evidence={'L': '0', 'M': '0'}) ex2_dist = infer.query(['B'], evidence={'L': '0', 'M': '1'}) ex3_dist = infer.query(['B'], evidence={'L': '1', 'M': '0'}) ex4_dist = infer.query(['B'], evidence={'L': '1', 'M': '1'}) print(l_dist) print() print(b_dist) print() print(ex1_dist) print()
lifeExp_cpd = TabularCPD( variable='L',variable_card=2, values=[[0.7, 0.25, 0.1],[0.3, 0.75, 0.9]], evidence=['I'],evidence_card=[3]) femaleSchool_cpd = TabularCPD( variable='F',variable_card=2, values=[[.8,.3,.2],[.2,.7,.8]], evidence=['I'],evidence_card=[3]) govtCorr_cpd = TabularCPD( variable='G', variable_card=2, values=[[.05,.4,.55,.85],[.95,.6,.45,.15]], evidence=['F','L'],evidence_card=[2,2]) model.add_cpds(income_cpd, lifeExp_cpd, femaleSchool_cpd, govtCorr_cpd) inference = VariableElimination(model) prob_G = inference.query(variables='G',evidence=dict([('L',1)])) print(prob_G['G']) # +-----+----------+ # | G | phi(G) | # |-----+----------| # | G_0 | 0.7292 | # | G_1 | 0.2708 | # +-----+----------+ inference = VariableElimination(model) prob_G = inference.query(variables='G',evidence=dict([('F',1)])) print(prob_G['G']) # +-----+----------+ # | G | phi(G) |
values=[[0.01], [0.99]]) cpd_do = TabularCPD(variable='dog_out', variable_card=2, values=[[0.99, 0.9, 0.97, 0.3], [0.01, 0.1, 0.03, 0.7]], evidence=['family_out', 'bowel_problem'], evidence_card=[2, 2]) cpd_lo = TabularCPD(variable='light_on', variable_card=2, values=[[0.6, 0.05], [0.4, 0.95]], evidence=['family_out'], evidence_card=[2]) cpd_hb = TabularCPD(variable='hear_bark', variable_card=2, values=[[0.7, 0.01], [0.3, 0.99]], evidence=['dog_out'], evidence_card=[2]) #integrity checking model.add_cpds(cpd_fo, cpd_bp, cpd_do, cpd_lo, cpd_hb) model.check_model() junction_tree = model.to_junction_tree() print(junction_tree.nodes()) infer_bp = BeliefPropagation(junction_tree) print( infer_bp.query(['family_out'], evidence={ 'light_on': 0, 'hear_bark': 1 })['family_out'])
class TestBeliefPropagation(unittest.TestCase): def setUp(self): self.junction_tree = JunctionTree([(('A', 'B'), ('B', 'C')), (('B', 'C'), ('C', 'D'))]) phi1 = Factor(['A', 'B'], [2, 3], range(6)) phi2 = Factor(['B', 'C'], [3, 2], range(6)) phi3 = Factor(['C', 'D'], [2, 2], range(4)) self.junction_tree.add_factors(phi1, phi2, phi3) self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, [[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, [[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], ['R', 'A'], [2, 2]) cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2]) cpd_l = TabularCPD('L', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], ['G', 'J'], [2, 2]) cpd_g = TabularCPD('G', 2, [[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) def test_calibrate_clique_belief(self): belief_propagation = BeliefPropagation(self.junction_tree) belief_propagation.calibrate() clique_belief = belief_propagation.get_clique_beliefs() phi1 = Factor(['A', 'B'], [2, 3], range(6)) phi2 = Factor(['B', 'C'], [3, 2], range(6)) phi3 = Factor(['C', 'D'], [2, 2], range(4)) b_A_B = phi1 * (phi3.marginalize(['D'], inplace=False) * phi2).marginalize(['C'], inplace=False) b_B_C = phi2 * (phi1.marginalize(['A'], inplace=False) * phi3.marginalize(['D'], inplace=False)) b_C_D = phi3 * (phi1.marginalize(['A'], inplace=False) * phi2).marginalize(['B'], inplace=False) np_test.assert_array_almost_equal(clique_belief[('A', 'B')].values, b_A_B.values) np_test.assert_array_almost_equal(clique_belief[('B', 'C')].values, b_B_C.values) np_test.assert_array_almost_equal(clique_belief[('C', 'D')].values, b_C_D.values) def test_calibrate_sepset_belief(self): belief_propagation = BeliefPropagation(self.junction_tree) belief_propagation.calibrate() sepset_belief = belief_propagation.get_sepset_beliefs() phi1 = Factor(['A', 'B'], [2, 3], range(6)) phi2 = Factor(['B', 'C'], [3, 2], range(6)) phi3 = Factor(['C', 'D'], [2, 2], range(4)) b_B = (phi1 * (phi3.marginalize(['D'], inplace=False) * phi2).marginalize(['C'], inplace=False)).marginalize(['A'], inplace=False) b_C = (phi2 * (phi1.marginalize(['A'], inplace=False) * phi3.marginalize(['D'], inplace=False))).marginalize(['B'], inplace=False) np_test.assert_array_almost_equal(sepset_belief[frozenset((('A', 'B'), ('B', 'C')))].values, b_B.values) np_test.assert_array_almost_equal(sepset_belief[frozenset((('B', 'C'), ('C', 'D')))].values, b_C.values) def test_max_calibrate_clique_belief(self): belief_propagation = BeliefPropagation(self.junction_tree) belief_propagation.max_calibrate() clique_belief = belief_propagation.get_clique_beliefs() phi1 = Factor(['A', 'B'], [2, 3], range(6)) phi2 = Factor(['B', 'C'], [3, 2], range(6)) phi3 = Factor(['C', 'D'], [2, 2], range(4)) b_A_B = phi1 * (phi3.maximize(['D'], inplace=False) * phi2).maximize(['C'], inplace=False) b_B_C = phi2 * (phi1.maximize(['A'], inplace=False) * phi3.maximize(['D'], inplace=False)) b_C_D = phi3 * (phi1.maximize(['A'], inplace=False) * phi2).maximize(['B'], inplace=False) np_test.assert_array_almost_equal(clique_belief[('A', 'B')].values, b_A_B.values) np_test.assert_array_almost_equal(clique_belief[('B', 'C')].values, b_B_C.values) np_test.assert_array_almost_equal(clique_belief[('C', 'D')].values, b_C_D.values) def test_max_calibrate_sepset_belief(self): belief_propagation = BeliefPropagation(self.junction_tree) belief_propagation.max_calibrate() sepset_belief = belief_propagation.get_sepset_beliefs() phi1 = Factor(['A', 'B'], [2, 3], range(6)) phi2 = Factor(['B', 'C'], [3, 2], range(6)) phi3 = Factor(['C', 'D'], [2, 2], range(4)) b_B = (phi1 * (phi3.maximize(['D'], inplace=False) * phi2).maximize(['C'], inplace=False)).maximize(['A'], inplace=False) b_C = (phi2 * (phi1.maximize(['A'], inplace=False) * phi3.maximize(['D'], inplace=False))).maximize(['B'], inplace=False) np_test.assert_array_almost_equal(sepset_belief[frozenset((('A', 'B'), ('B', 'C')))].values, b_B.values) np_test.assert_array_almost_equal(sepset_belief[frozenset((('B', 'C'), ('C', 'D')))].values, b_C.values) # All the values that are used for comparision in the all the tests are # found using SAMIAM (assuming that it is correct ;)) def test_query_single_variable(self): belief_propagation = BeliefPropagation(self.bayesian_model) query_result = belief_propagation.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) def test_query_multiple_variable(self): belief_propagation = BeliefPropagation(self.bayesian_model) query_result = belief_propagation.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) def test_query_single_variable_with_evidence(self): belief_propagation = BeliefPropagation(self.bayesian_model) query_result = belief_propagation.query(variables=['J'], evidence={'A': 0, 'R': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) def test_query_multiple_variable_with_evidence(self): belief_propagation = BeliefPropagation(self.bayesian_model) query_result = belief_propagation.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_map_query(self): belief_propagation = BeliefPropagation(self.bayesian_model) map_query = belief_propagation.map_query() self.assertDictEqual(map_query, {'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0, 'L': 0}) def test_map_query_with_evidence(self): belief_propagation = BeliefPropagation(self.bayesian_model) map_query = belief_propagation.map_query(['A', 'R', 'L'], {'J': 0, 'Q': 1, 'G': 0}) self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0}) def tearDown(self): del self.junction_tree del self.bayesian_model
def create_models(hidden_suspects, hidden_weapons, hidden_rooms): """ Creates Bayesian Networks for the BN Player. :param hidden_suspects: Number of hidden suspect cards, which is the domain size of the variables in the suspects BN. :param hidden_weapons: Number of hidden weapon cards, which is the domain size of the variables in the weapons BN. :param hidden_rooms: Number of hidden room cards, which is the domain size of the variables in the rooms BN. :return: a tuple (suspects model, weapons model, rooms model) """ # Suspects model: suspects_model = BayesianModel([('s', 's1_p2'), ('s', 's2_p2'), ('s1_p2', 's2_p2'), ('s', 's1_p3'), ('s1_p2', 's1_p3'), ('s2_p2', 's1_p3')]) s_cpd = TabularCPD(variable='s', variable_card=hidden_suspects, values=create_cpd_table(0, hidden_suspects)) s1_p2_cpd = TabularCPD(variable='s1_p2', variable_card=hidden_suspects, values=create_cpd_table(1, hidden_suspects), evidence=['s'], evidence_card=[hidden_suspects]) s2_p2_cpd = TabularCPD(variable='s2_p2', variable_card=hidden_suspects, values=create_cpd_table(2, hidden_suspects), evidence=['s', 's1_p2'], evidence_card=[hidden_suspects, hidden_suspects]) s1_p3_cpd = TabularCPD( variable='s1_p3', variable_card=hidden_suspects, values=create_cpd_table(3, hidden_suspects), evidence=['s', 's1_p2', 's2_p2'], evidence_card=[hidden_suspects, hidden_suspects, hidden_suspects]) suspects_model.add_cpds(s_cpd, s1_p2_cpd, s2_p2_cpd, s1_p3_cpd) # Weapons model: weapons_model = BayesianModel([('w', 'w1_p2'), ('w', 'w2_p2'), ('w', 'w1_p3'), ('w', 'w2_p3'), ('w1_p2', 'w2_p2'), ('w1_p2', 'w1_p3'), ('w1_p2', 'w2_p3'), ('w2_p2', 'w1_p3'), ('w2_p2', 'w2_p3'), ('w1_p3', 'w2_p3')]) w_cpd = TabularCPD(variable='w', variable_card=hidden_weapons, values=create_cpd_table(0, hidden_weapons)) w1_p2_cpd = TabularCPD(variable='w1_p2', variable_card=hidden_weapons, values=create_cpd_table(1, hidden_weapons), evidence=['w'], evidence_card=[hidden_weapons]) w2_p2_cpd = TabularCPD(variable='w2_p2', variable_card=hidden_weapons, values=create_cpd_table(2, hidden_weapons), evidence=['w', 'w1_p2'], evidence_card=[hidden_weapons, hidden_weapons]) w1_p3_cpd = TabularCPD( variable='w1_p3', variable_card=hidden_weapons, values=create_cpd_table(3, hidden_weapons), evidence=['w', 'w1_p2', 'w2_p2'], evidence_card=[hidden_weapons, hidden_weapons, hidden_weapons]) w2_p3_cpd = TabularCPD(variable='w2_p3', variable_card=hidden_weapons, values=create_cpd_table(4, hidden_weapons), evidence=['w', 'w1_p2', 'w2_p2', 'w1_p3'], evidence_card=[ hidden_weapons, hidden_weapons, hidden_weapons, hidden_weapons ]) weapons_model.add_cpds(w_cpd, w1_p2_cpd, w2_p2_cpd, w1_p3_cpd, w2_p3_cpd) # Rooms model: rooms_model = BayesianModel([('r', 'r1_p2'), ('r', 'r2_p2'), ('r', 'r1_p3'), ('r', 'r2_p3'), ('r1_p2', 'r2_p2'), ('r1_p2', 'r1_p3'), ('r1_p2', 'r2_p3'), ('r2_p2', 'r1_p3'), ('r2_p2', 'r2_p3'), ('r1_p3', 'r2_p3')]) r_cpd = TabularCPD(variable='r', variable_card=hidden_rooms, values=create_cpd_table(0, hidden_rooms)) r1_p2_cpd = TabularCPD(variable='r1_p2', variable_card=hidden_rooms, values=create_cpd_table(1, hidden_rooms), evidence=['r'], evidence_card=[hidden_rooms]) r2_p2_cpd = TabularCPD(variable='r2_p2', variable_card=hidden_rooms, values=create_cpd_table(2, hidden_rooms), evidence=['r', 'r1_p2'], evidence_card=[hidden_rooms, hidden_rooms]) r1_p3_cpd = TabularCPD( variable='r1_p3', variable_card=hidden_rooms, values=create_cpd_table(3, hidden_rooms), evidence=['r', 'r1_p2', 'r2_p2'], evidence_card=[hidden_rooms, hidden_rooms, hidden_rooms]) r2_p3_cpd = TabularCPD( variable='r2_p3', variable_card=hidden_rooms, values=create_cpd_table(4, hidden_rooms), evidence=['r', 'r1_p2', 'r2_p2', 'r1_p3'], evidence_card=[hidden_rooms, hidden_rooms, hidden_rooms, hidden_rooms]) rooms_model.add_cpds(r_cpd, r1_p2_cpd, r2_p2_cpd, r1_p3_cpd, r2_p3_cpd) return suspects_model, weapons_model, rooms_model
class TestInferenceBase(unittest.TestCase): def setUp(self): self.bayesian = BayesianModel([('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e')]) a_cpd = TabularCPD('a', 2, [[0.4, 0.6]]) b_cpd = TabularCPD('b', 2, [[0.2, 0.4], [0.8, 0.6]], evidence=['a'], evidence_card=[2]) c_cpd = TabularCPD('c', 2, [[0.1, 0.2], [0.9, 0.8]], evidence=['b'], evidence_card=[2]) d_cpd = TabularCPD('d', 2, [[0.4, 0.3], [0.6, 0.7]], evidence=['c'], evidence_card=[2]) e_cpd = TabularCPD('e', 2, [[0.3, 0.2], [0.7, 0.8]], evidence=['d'], evidence_card=[2]) self.bayesian.add_cpds(a_cpd, b_cpd, c_cpd, d_cpd, e_cpd) self.markov = MarkovModel([('a', 'b'), ('b', 'd'), ('a', 'c'), ('c', 'd')]) factor_1 = DiscreteFactor(['a', 'b'], [2, 2], np.array([100, 1, 1, 100])) factor_2 = DiscreteFactor(['a', 'c'], [2, 2], np.array([40, 30, 100, 20])) factor_3 = DiscreteFactor(['b', 'd'], [2, 2], np.array([1, 100, 100, 1])) factor_4 = DiscreteFactor(['c', 'd'], [2, 2], np.array([60, 60, 40, 40])) self.markov.add_factors(factor_1, factor_2, factor_3, factor_4) def test_bayesian_inference_init(self): infer_bayesian = Inference(self.bayesian) self.assertEqual(set(infer_bayesian.variables), {'a', 'b', 'c', 'd', 'e'}) self.assertEqual(infer_bayesian.cardinality, { 'a': 2, 'b': 2, 'c': 2, 'd': 2, 'e': 2 }) self.assertIsInstance(infer_bayesian.factors, defaultdict) self.assertEqual( set(infer_bayesian.factors['a']), set([ self.bayesian.get_cpds('a').to_factor(), self.bayesian.get_cpds('b').to_factor() ])) self.assertEqual( set(infer_bayesian.factors['b']), set([ self.bayesian.get_cpds('b').to_factor(), self.bayesian.get_cpds('c').to_factor() ])) self.assertEqual( set(infer_bayesian.factors['c']), set([ self.bayesian.get_cpds('c').to_factor(), self.bayesian.get_cpds('d').to_factor() ])) self.assertEqual( set(infer_bayesian.factors['d']), set([ self.bayesian.get_cpds('d').to_factor(), self.bayesian.get_cpds('e').to_factor() ])) self.assertEqual(set(infer_bayesian.factors['e']), set([self.bayesian.get_cpds('e').to_factor()])) def test_markov_inference_init(self): infer_markov = Inference(self.markov) self.assertEqual(set(infer_markov.variables), {'a', 'b', 'c', 'd'}) self.assertEqual(infer_markov.cardinality, { 'a': 2, 'b': 2, 'c': 2, 'd': 2 }) self.assertEqual( infer_markov.factors, { 'a': [ DiscreteFactor(['a', 'b'], [2, 2], np.array([100, 1, 1, 100])), DiscreteFactor(['a', 'c'], [2, 2], np.array([40, 30, 100, 20])) ], 'b': [ DiscreteFactor(['a', 'b'], [2, 2], np.array([100, 1, 1, 100])), DiscreteFactor(['b', 'd'], [2, 2], np.array([1, 100, 100, 1])) ], 'c': [ DiscreteFactor(['a', 'c'], [2, 2], np.array([40, 30, 100, 20])), DiscreteFactor(['c', 'd'], [2, 2], np.array([60, 60, 40, 40])) ], 'd': [ DiscreteFactor(['b', 'd'], [2, 2], np.array([1, 100, 100, 1])), DiscreteFactor(['c', 'd'], [2, 2], np.array([60, 60, 40, 40])) ] })
class TestVariableElimination(unittest.TestCase): def setUp(self): self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, values=[[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, values=[[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=['A', 'R'], evidence_card=[2, 2]) cpd_q = TabularCPD('Q', 2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=['J'], evidence_card=[2]) cpd_l = TabularCPD('L', 2, values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence=['J', 'G'], evidence_card=[2, 2]) cpd_g = TabularCPD('G', 2, values=[[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.bayesian_inference = VariableElimination(self.bayesian_model) # All the values that are used for comparision in the all the tests are # found using SAMIAM (assuming that it is correct ;)) def test_query_single_variable(self): query_result = self.bayesian_inference.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) def test_query_multiple_variable(self): query_result = self.bayesian_inference.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) def test_query_single_variable_with_evidence(self): query_result = self.bayesian_inference.query(variables=['J'], evidence={ 'A': 0, 'R': 1 }) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) def test_query_multiple_variable_with_evidence(self): query_result = self.bayesian_inference.query(variables=['J', 'Q'], evidence={ 'A': 0, 'R': 0, 'G': 0, 'L': 1 }) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_query_multiple_times(self): # This just tests that the models are not getting modified while querying them query_result = self.bayesian_inference.query(['J']) query_result = self.bayesian_inference.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) query_result = self.bayesian_inference.query(['Q', 'J']) query_result = self.bayesian_inference.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) query_result = self.bayesian_inference.query(variables=['J'], evidence={ 'A': 0, 'R': 1 }) query_result = self.bayesian_inference.query(variables=['J'], evidence={ 'A': 0, 'R': 1 }) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) query_result = self.bayesian_inference.query(variables=['J', 'Q'], evidence={ 'A': 0, 'R': 0, 'G': 0, 'L': 1 }) query_result = self.bayesian_inference.query(variables=['J', 'Q'], evidence={ 'A': 0, 'R': 0, 'G': 0, 'L': 1 }) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_max_marginal(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(), 0.1659, decimal=4) def test_max_marginal_var(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G' ]), 0.5714, decimal=4) def test_max_marginal_var1(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal( ['G', 'R']), 0.4055, decimal=4) def test_max_marginal_var2(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal( ['G', 'R', 'A']), 0.3260, decimal=4) def test_map_query(self): map_query = self.bayesian_inference.map_query() self.assertDictEqual(map_query, { 'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0, 'L': 0 }) def test_map_query_with_evidence(self): map_query = self.bayesian_inference.map_query(['A', 'R', 'L'], { 'J': 0, 'Q': 1, 'G': 0 }) self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0}) def test_induced_graph(self): induced_graph = self.bayesian_inference.induced_graph( ['G', 'Q', 'A', 'J', 'L', 'R']) result_edges = sorted([sorted(x) for x in induced_graph.edges()]) self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'], ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']], result_edges) def test_induced_width(self): result_width = self.bayesian_inference.induced_width( ['G', 'Q', 'A', 'J', 'L', 'R']) self.assertEqual(2, result_width) def tearDown(self): del self.bayesian_inference del self.bayesian_model
class TestBayesianModelCPD(unittest.TestCase): def setUp(self): self.G = BayesianModel([('d', 'g'), ('i', 'g'), ('g', 'l'), ('i', 's')]) def test_active_trail_nodes(self): self.assertEqual(sorted(self.G.active_trail_nodes('d')), ['d', 'g', 'l']) self.assertEqual(sorted(self.G.active_trail_nodes('i')), ['g', 'i', 'l', 's']) def test_active_trail_nodes_args(self): self.assertEqual(sorted(self.G.active_trail_nodes('d', observed='g')), ['d', 'i', 's']) self.assertEqual(sorted(self.G.active_trail_nodes('l', observed='g')), ['l']) self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['i', 'l'])), ['s']) self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['d', 'l'])), ['g', 'i', 's']) def test_is_active_trail_triplets(self): self.assertTrue(self.G.is_active_trail('d', 'l')) self.assertTrue(self.G.is_active_trail('g', 's')) self.assertFalse(self.G.is_active_trail('d', 'i')) self.assertTrue(self.G.is_active_trail('d', 'i', observed='g')) self.assertFalse(self.G.is_active_trail('d', 'l', observed='g')) self.assertFalse(self.G.is_active_trail('i', 'l', observed='g')) self.assertTrue(self.G.is_active_trail('d', 'i', observed='l')) self.assertFalse(self.G.is_active_trail('g', 's', observed='i')) def test_is_active_trail(self): self.assertFalse(self.G.is_active_trail('d', 's')) self.assertTrue(self.G.is_active_trail('s', 'l')) self.assertTrue(self.G.is_active_trail('d', 's', observed='g')) self.assertFalse(self.G.is_active_trail('s', 'l', observed='g')) def test_is_active_trail_args(self): self.assertFalse(self.G.is_active_trail('s', 'l', 'i')) self.assertFalse(self.G.is_active_trail('s', 'l', 'g')) self.assertTrue(self.G.is_active_trail('d', 's', 'l')) self.assertFalse(self.G.is_active_trail('d', 's', ['i', 'l'])) def test_get_cpds(self): cpd_d = TabularCPD('d', 2, np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2]) cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2) cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d').variable, 'd') def test_get_cpds1(self): self.model = BayesianModel([('A', 'AB')]) cpd_a = TabularCPD('A', 2, np.random.rand(2, 1)) cpd_ab = TabularCPD('AB', 2, np.random.rand(2, 2), evidence=['A'], evidence_card=[2]) self.model.add_cpds(cpd_a, cpd_ab) self.assertEqual(self.model.get_cpds('A').variable, 'A') self.assertEqual(self.model.get_cpds('AB').variable, 'AB') def test_add_single_cpd(self): from pgmpy.factors import TabularCPD cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_s) self.assertListEqual(self.G.get_cpds(), [cpd_s]) def test_add_multiple_cpds(self): from pgmpy.factors import TabularCPD cpd_d = TabularCPD('d', 2, np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2]) cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2) cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d'), cpd_d) self.assertEqual(self.G.get_cpds('i'), cpd_i) self.assertEqual(self.G.get_cpds('g'), cpd_g) self.assertEqual(self.G.get_cpds('l'), cpd_l) self.assertEqual(self.G.get_cpds('s'), cpd_s) def tearDown(self): del self.G
class TestBeliefPropagation(unittest.TestCase): def setUp(self): self.junction_tree = JunctionTree([(('A', 'B'), ('B', 'C')), (('B', 'C'), ('C', 'D'))]) phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6)) phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6)) phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4)) self.junction_tree.add_factors(phi1, phi2, phi3) self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, values=[[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, values=[[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=['A', 'R'], evidence_card=[2, 2]) cpd_q = TabularCPD('Q', 2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=['J'], evidence_card=[2]) cpd_l = TabularCPD('L', 2, values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence=['J', 'G'], evidence_card=[2, 2]) cpd_g = TabularCPD('G', 2, values=[[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) def test_calibrate_clique_belief(self): belief_propagation = BeliefPropagation(self.junction_tree) belief_propagation.calibrate() clique_belief = belief_propagation.get_clique_beliefs() phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6)) phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6)) phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4)) b_A_B = phi1 * (phi3.marginalize(['D'], inplace=False) * phi2).marginalize(['C'], inplace=False) b_B_C = phi2 * (phi1.marginalize(['A'], inplace=False) * phi3.marginalize(['D'], inplace=False)) b_C_D = phi3 * (phi1.marginalize(['A'], inplace=False) * phi2).marginalize(['B'], inplace=False) np_test.assert_array_almost_equal(clique_belief[('A', 'B')].values, b_A_B.values) np_test.assert_array_almost_equal(clique_belief[('B', 'C')].values, b_B_C.values) np_test.assert_array_almost_equal(clique_belief[('C', 'D')].values, b_C_D.values) def test_calibrate_sepset_belief(self): belief_propagation = BeliefPropagation(self.junction_tree) belief_propagation.calibrate() sepset_belief = belief_propagation.get_sepset_beliefs() phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6)) phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6)) phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4)) b_B = (phi1 * (phi3.marginalize(['D'], inplace=False) * phi2).marginalize( ['C'], inplace=False)).marginalize(['A'], inplace=False) b_C = (phi2 * (phi1.marginalize(['A'], inplace=False) * phi3.marginalize(['D'], inplace=False))).marginalize( ['B'], inplace=False) np_test.assert_array_almost_equal( sepset_belief[frozenset((('A', 'B'), ('B', 'C')))].values, b_B.values) np_test.assert_array_almost_equal( sepset_belief[frozenset((('B', 'C'), ('C', 'D')))].values, b_C.values) def test_max_calibrate_clique_belief(self): belief_propagation = BeliefPropagation(self.junction_tree) belief_propagation.max_calibrate() clique_belief = belief_propagation.get_clique_beliefs() phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6)) phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6)) phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4)) b_A_B = phi1 * (phi3.maximize(['D'], inplace=False) * phi2).maximize( ['C'], inplace=False) b_B_C = phi2 * (phi1.maximize(['A'], inplace=False) * phi3.maximize(['D'], inplace=False)) b_C_D = phi3 * (phi1.maximize(['A'], inplace=False) * phi2).maximize( ['B'], inplace=False) np_test.assert_array_almost_equal(clique_belief[('A', 'B')].values, b_A_B.values) np_test.assert_array_almost_equal(clique_belief[('B', 'C')].values, b_B_C.values) np_test.assert_array_almost_equal(clique_belief[('C', 'D')].values, b_C_D.values) def test_max_calibrate_sepset_belief(self): belief_propagation = BeliefPropagation(self.junction_tree) belief_propagation.max_calibrate() sepset_belief = belief_propagation.get_sepset_beliefs() phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6)) phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6)) phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4)) b_B = (phi1 * (phi3.maximize(['D'], inplace=False) * phi2).maximize( ['C'], inplace=False)).maximize(['A'], inplace=False) b_C = (phi2 * (phi1.maximize(['A'], inplace=False) * phi3.maximize(['D'], inplace=False))).maximize( ['B'], inplace=False) np_test.assert_array_almost_equal( sepset_belief[frozenset((('A', 'B'), ('B', 'C')))].values, b_B.values) np_test.assert_array_almost_equal( sepset_belief[frozenset((('B', 'C'), ('C', 'D')))].values, b_C.values) # All the values that are used for comparision in the all the tests are # found using SAMIAM (assuming that it is correct ;)) def test_query_single_variable(self): belief_propagation = BeliefPropagation(self.bayesian_model) query_result = belief_propagation.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) def test_query_multiple_variable(self): belief_propagation = BeliefPropagation(self.bayesian_model) query_result = belief_propagation.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) def test_query_single_variable_with_evidence(self): belief_propagation = BeliefPropagation(self.bayesian_model) query_result = belief_propagation.query(variables=['J'], evidence={ 'A': 0, 'R': 1 }) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) def test_query_multiple_variable_with_evidence(self): belief_propagation = BeliefPropagation(self.bayesian_model) query_result = belief_propagation.query(variables=['J', 'Q'], evidence={ 'A': 0, 'R': 0, 'G': 0, 'L': 1 }) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_map_query(self): belief_propagation = BeliefPropagation(self.bayesian_model) map_query = belief_propagation.map_query() self.assertDictEqual(map_query, { 'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0, 'L': 0 }) def test_map_query_with_evidence(self): belief_propagation = BeliefPropagation(self.bayesian_model) map_query = belief_propagation.map_query(['A', 'R', 'L'], { 'J': 0, 'Q': 1, 'G': 0 }) self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0}) def tearDown(self): del self.junction_tree del self.bayesian_model
class TestDirectedGraphCPDOperations(unittest.TestCase): def setUp(self): self.graph = BayesianModel() def test_add_single_cpd(self): cpd = TabularCPD('grade', 2, np.random.rand(2, 4), ['diff', 'intel'], [2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd) self.assertListEqual(self.graph.get_cpds(), [cpd]) def test_add_multiple_cpds(self): cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4), ['diff', 'intel'], [2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.assertListEqual(self.graph.get_cpds(), [cpd1, cpd2, cpd3]) def test_remove_single_cpd(self): cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4), ['diff', 'intel'], [2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.graph.remove_cpds(cpd1) self.assertListEqual(self.graph.get_cpds(), [cpd2, cpd3]) def test_remove_multiple_cpds(self): cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4), ['diff', 'intel'], [2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.graph.remove_cpds(cpd1, cpd3) self.assertListEqual(self.graph.get_cpds(), [cpd2]) def test_remove_single_cpd_string(self): cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4), ['diff', 'intel'], [2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.graph.remove_cpds('diff') self.assertListEqual(self.graph.get_cpds(), [cpd2, cpd3]) def test_remove_multiple_cpds_string(self): cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4), ['diff', 'intel'], [2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.graph.remove_cpds('diff', 'grade') self.assertListEqual(self.graph.get_cpds(), [cpd2]) def test_get_cpd_for_node(self): cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4), ['diff', 'intel'], [2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.assertEqual(self.graph.get_cpds('diff'), cpd1) self.assertEqual(self.graph.get_cpds('intel'), cpd2) self.assertEqual(self.graph.get_cpds('grade'), cpd3) def test_get_cpd_raises_error(self): cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4), ['diff', 'intel'], [2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.assertRaises(ValueError, self.graph.get_cpds, 'sat') def tearDown(self): del self.graph
class TestBayesianModelMethods(unittest.TestCase): def setUp(self): self.G = BayesianModel([('a', 'd'), ('b', 'd'), ('d', 'e'), ('b', 'c')]) self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]]) intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]]) grade_cpd = TabularCPD('grade', 3, values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3]) self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd) def test_moral_graph(self): moral_graph = self.G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e']) for edge in moral_graph.edges(): self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')] or (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')]) def test_moral_graph_with_edge_present_over_parents(self): G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'), ('a', 'b')]) moral_graph = G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e']) for edge in moral_graph.edges(): self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')] or (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')]) def test_local_independencies(self): self.assertEqual(self.G.local_independencies('a'), Independencies(['a', ['b', 'c']])) self.assertEqual(self.G.local_independencies('c'), Independencies(['c', ['a', 'd', 'e'], 'b'])) self.assertEqual(self.G.local_independencies('d'), Independencies(['d', 'c', ['b', 'a']])) self.assertEqual(self.G.local_independencies('e'), Independencies(['e', ['c', 'b', 'a'], 'd'])) self.assertEqual(self.G.local_independencies('b'), Independencies(['b', 'a'])) self.assertEqual(self.G1.local_independencies('grade'), Independencies()) def test_get_independencies(self): chain = BayesianModel([('X', 'Y'), ('Y', 'Z')]) self.assertEqual(chain.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) fork = BayesianModel([('Y', 'X'), ('Y', 'Z')]) self.assertEqual(fork.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) collider = BayesianModel([('X', 'Y'), ('Z', 'Y')]) self.assertEqual(collider.get_independencies(), Independencies(('X', 'Z'), ('Z', 'X'))) def test_is_imap(self): val = [0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032, 0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128] JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'], [2, 3, 3], val) fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val) self.assertTrue(self.G1.is_imap(JPD)) self.assertRaises(TypeError, self.G1.is_imap, fac) def test_get_immoralities(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')}) G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')]) self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')}) G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'), ('w', 'x')]) self.assertEqual(G2.get_immoralities(), {('w', 'z')}) def test_is_iequivalent(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertRaises(TypeError, G.is_iequivalent, MarkovModel()) G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')]) G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')]) self.assertTrue(G1.is_iequivalent(G2)) G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')]) self.assertFalse(G3.is_iequivalent(G2)) def test_copy(self): model_copy = self.G1.copy() self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) self.assertNotEqual(id(self.G1.get_cpds('diff')), id(model_copy.get_cpds('diff'))) self.G1.remove_cpds('diff') diff_cpd = TabularCPD('diff', 2, values=[[0.3], [0.7]]) self.G1.add_cpds(diff_cpd) self.assertNotEqual(self.G1.get_cpds('diff'), model_copy.get_cpds('diff')) self.G1.remove_node('intel') self.assertNotEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertNotEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) def test_remove_node(self): self.G1.remove_node('diff') self.assertEqual(sorted(self.G1.nodes()), sorted(['grade', 'intel'])) self.assertRaises(ValueError, self.G1.get_cpds, 'diff') def test_remove_nodes_from(self): self.G1.remove_nodes_from(['diff', 'grade']) self.assertEqual(sorted(self.G1.nodes()), sorted(['intel'])) self.assertRaises(ValueError, self.G1.get_cpds, 'diff') self.assertRaises(ValueError, self.G1.get_cpds, 'grade') def tearDown(self): del self.G del self.G1
class TestUAIWriter(unittest.TestCase): def setUp(self): self.maxDiff = None edges = [['family-out', 'dog-out'], ['bowel-problem', 'dog-out'], ['family-out', 'light-on'], ['dog-out', 'hear-bark']] cpds = {'bowel-problem': np.array([[0.01], [0.99]]), 'dog-out': np.array([[0.99, 0.01, 0.97, 0.03], [0.9, 0.1, 0.3, 0.7]]), 'family-out': np.array([[0.15], [0.85]]), 'hear-bark': np.array([[0.7, 0.3], [0.01, 0.99]]), 'light-on': np.array([[0.6, 0.4], [0.05, 0.95]])} states = {'bowel-problem': ['true', 'false'], 'dog-out': ['true', 'false'], 'family-out': ['true', 'false'], 'hear-bark': ['true', 'false'], 'light-on': ['true', 'false']} parents = {'bowel-problem': [], 'dog-out': ['bowel-problem', 'family-out'], 'family-out': [], 'hear-bark': ['dog-out'], 'light-on': ['family-out']} self.bayesmodel = BayesianModel(edges) tabular_cpds = [] for var, values in cpds.items(): cpd = TabularCPD(var, len(states[var]), values, evidence=parents[var], evidence_card=[len(states[evidence_var]) for evidence_var in parents[var]]) tabular_cpds.append(cpd) self.bayesmodel.add_cpds(*tabular_cpds) self.bayeswriter = UAIWriter(self.bayesmodel) edges = {('var_0', 'var_1'), ('var_0', 'var_2'), ('var_1', 'var_2')} self.markovmodel = MarkovModel(edges) tables = [(['var_0', 'var_1'], ['4.000', '2.400', '1.000', '0.000']), (['var_0', 'var_1', 'var_2'], ['2.2500', '3.2500', '3.7500', '0.0000', '0.0000', '10.0000', '1.8750', '4.0000', '3.3330', '2.0000', '2.0000', '3.4000'])] domain = {'var_1': '2', 'var_2': '3', 'var_0': '2'} factors = [] for table in tables: variables = table[0] cardinality = [int(domain[var]) for var in variables] values = list(map(float, table[1])) factor = DiscreteFactor(variables, cardinality, values) factors.append(factor) self.markovmodel.add_factors(*factors) self.markovwriter = UAIWriter(self.markovmodel) def test_bayes_model(self): self.expected_bayes_file = """BAYES 5 2 2 2 2 2 5 1 0 3 2 0 1 1 2 2 1 3 2 2 4 2 0.01 0.99 8 0.99 0.01 0.97 0.03 0.9 0.1 0.3 0.7 2 0.15 0.85 4 0.7 0.3 0.01 0.99 4 0.6 0.4 0.05 0.95""" self.assertEqual(str(self.bayeswriter.__str__()), str(self.expected_bayes_file)) def test_markov_model(self): self.expected_markov_file = """MARKOV 3 2 2 3 2 2 0 1 3 0 1 2 4 4.0 2.4 1.0 0.0 12 2.25 3.25 3.75 0.0 0.0 10.0 1.875 4.0 3.333 2.0 2.0 3.4""" self.assertEqual(str(self.markovwriter.__str__()), str(self.expected_markov_file))
class TestBIFWriter(unittest.TestCase): def setUp(self): edges = [['family-out', 'dog-out'], ['bowel-problem', 'dog-out'], ['family-out', 'light-on'], ['dog-out', 'hear-bark']] cpds = {'bowel-problem': np.array([[0.01], [0.99]]), 'dog-out': np.array([[0.99, 0.01, 0.97, 0.03], [0.9, 0.1, 0.3, 0.7]]), 'family-out': np.array([[0.15], [0.85]]), 'hear-bark': np.array([[0.7, 0.3], [0.01, 0.99]]), 'light-on': np.array([[0.6, 0.4], [0.05, 0.95]])} states = {'bowel-problem': ['true', 'false'], 'dog-out': ['true', 'false'], 'family-out': ['true', 'false'], 'hear-bark': ['true', 'false'], 'light-on': ['true', 'false']} parents = {'bowel-problem': [], 'dog-out': ['family-out', 'bowel-problem'], 'family-out': [], 'hear-bark': ['dog-out'], 'light-on': ['family-out']} properties = {'bowel-problem': ['position = (335, 99)'], 'dog-out': ['position = (300, 195)'], 'family-out': ['position = (257, 99)'], 'hear-bark': ['position = (296, 268)'], 'light-on': ['position = (218, 195)']} self.model = BayesianModel(edges) tabular_cpds = [] for var in sorted(cpds.keys()): values = cpds[var] cpd = TabularCPD(var, len(states[var]), values, evidence=parents[var], evidence_card=[len(states[evidence_var]) for evidence_var in parents[var]]) tabular_cpds.append(cpd) self.model.add_cpds(*tabular_cpds) for node, properties in properties.items(): for prop in properties: prop_name, prop_value = map(lambda t: t.strip(), prop.split('=')) self.model.node[node][prop_name] = prop_value self.writer = BIFWriter(model=self.model) def test_str(self): self.expected_string = """network unknown { } variable bowel-problem { type discrete [ 2 ] { bowel-problem_0, bowel-problem_1 }; property position = (335, 99) ; } variable dog-out { type discrete [ 2 ] { dog-out_0, dog-out_1 }; property position = (300, 195) ; } variable family-out { type discrete [ 2 ] { family-out_0, family-out_1 }; property position = (257, 99) ; } variable hear-bark { type discrete [ 2 ] { hear-bark_0, hear-bark_1 }; property position = (296, 268) ; } variable light-on { type discrete [ 2 ] { light-on_0, light-on_1 }; property position = (218, 195) ; } probability ( bowel-problem ) { table 0.01, 0.99 ; } probability ( dog-out | bowel-problem, family-out ) { table 0.99, 0.01, 0.97, 0.03, 0.9, 0.1, 0.3, 0.7 ; } probability ( family-out ) { table 0.15, 0.85 ; } probability ( hear-bark | dog-out ) { table 0.7, 0.3, 0.01, 0.99 ; } probability ( light-on | family-out ) { table 0.6, 0.4, 0.05, 0.95 ; } """ self.maxDiff = None self.assertEqual(self.writer.__str__(), self.expected_string)
class TestBayesianModelSampling(unittest.TestCase): def setUp(self): self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, [[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, [[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], ['R', 'A'], [2, 2]) cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2]) cpd_l = TabularCPD('L', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], ['G', 'J'], [2, 2]) cpd_g = TabularCPD('G', 2, [[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.sampling_inference = BayesianModelSampling(self.bayesian_model) self.markov_model = MarkovModel() def test_init(self): with self.assertRaises(TypeError): BayesianModelSampling(self.markov_model) def test_forward_sample(self): sample = self.sampling_inference.forward_sample(25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 6) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertTrue(set(sample.A).issubset({State('A', 0), State('A', 1)})) self.assertTrue(set(sample.J).issubset({State('J', 0), State('J', 1)})) self.assertTrue(set(sample.R).issubset({State('R', 0), State('R', 1)})) self.assertTrue(set(sample.Q).issubset({State('Q', 0), State('Q', 1)})) self.assertTrue(set(sample.G).issubset({State('G', 0), State('G', 1)})) self.assertTrue(set(sample.L).issubset({State('L', 0), State('L', 1)})) def test_rejection_sample_basic(self): sample = self.sampling_inference.rejection_sample([State('A', 1), State('J', 1), State('R', 1)], 25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 6) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertTrue(set(sample.A).issubset({State('A', 1)})) self.assertTrue(set(sample.J).issubset({State('J', 1)})) self.assertTrue(set(sample.R).issubset({State('R', 1)})) self.assertTrue(set(sample.Q).issubset({State('Q', 0), State('Q', 1)})) self.assertTrue(set(sample.G).issubset({State('G', 0), State('G', 1)})) self.assertTrue(set(sample.L).issubset({State('L', 0), State('L', 1)})) def test_likelihood_weighted_sample(self): sample = self.sampling_inference.likelihood_weighted_sample([State('A', 0), State('J', 1), State('R', 0)], 25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 7) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertIn('_weight', sample.columns) self.assertTrue(set(sample.A).issubset({State('A', 0), State('A', 1)})) self.assertTrue(set(sample.J).issubset({State('J', 0), State('J', 1)})) self.assertTrue(set(sample.R).issubset({State('R', 0), State('R', 1)})) self.assertTrue(set(sample.Q).issubset({State('Q', 0), State('Q', 1)})) self.assertTrue(set(sample.G).issubset({State('G', 0), State('G', 1)})) self.assertTrue(set(sample.L).issubset({State('L', 0), State('L', 1)})) def tearDown(self): del self.sampling_inference del self.bayesian_model del self.markov_model
class BaseModel(object): """ Un objeto de este tipo contiene al modelo gráfico probabilista, incluye su grafo y sus parámetros (CPD) además de un objeto para hacer inferencia. Args: config_file_path (str) : la ruta al json con la información de DAG y sus tablas de probabilidad condicional. data (dict) : si no se cuenta con un archivo de configuración se puede utilizar un diccionario con los elementos para inicializar el objeto. to-do : por ahora sólo funciona con valores binarias. """ def __init__(self, config_file_path=None, data=None): self.config_file_path = config_file_path self.digraph = None self.pgmodel = None self.infer_system = None self.ebunch = None self.nodes = None self.variables_dict = dict() if config_file_path: with open(config_file_path) as json_file: data = json.load(json_file) if data.get('digraph'): self.ebunch = data['digraph'] self.pgmodel = BayesianModel(self.ebunch) self.nodes = data.get('nodes', []) if self.nodes: self.pgmodel.add_nodes_from(self.nodes) self.init_graph(ebunch=self.ebunch, nodes=self.nodes) if data.get('cpdtables'): self.init_model(self.ebunch, data['cpdtables']) for table in self.pgmodel.get_cpds(): logging.info(table) self.target = data['target'] self.nature_variables = data['nature_variables'] self.intervention_variables = data['interventions'] def init_graph(self, ebunch, nodes=[], plot=True, graph_id='figures/dag'): """ Creo el DAG con DiGraph de la biblioteca networkx usando una lista de aristas. Args: ebunch (list) : una lista de que contiene a las aristas del grafo. plot (boolean) : una bandera para saber si guardo una imagen del grafo usando matplotlib. graph_id (str): el nombre para identificar el grafo. """ self.digraph = nx.DiGraph(ebunch) for node in nodes: self.digraph.add_node(node) if plot: self.save_digraph_as_img(graph_id) def reset(self, pgmodel, ebunch, nodes=[]): """ Método para cambiar el modelo y el grafo. Además, se actualiza el sistema de inferencia de acuerdo con el nuevo modelo. Este método se utiliza para hacer un modelo dinámico donde lo único que se mantienen son las variables. """ self.init_graph(ebunch, nodes=nodes, plot=False) for variable in pgmodel.nodes(): self.variables_dict[variable] = [0, 1] self.ebunch = ebunch self.nodes = nodes self.pgmodel = pgmodel self.update_infer_system() def show_graph(self): """ Usa matplolib para mostrar el grafo causal del modelo. """ pos = nx.circular_layout(self.digraph) nx.draw(self.digraph, with_labels=True, pos=pos) plt.show() plt.clf() def init_model(self, ebunch, cpdtables, plot=False, pgm_id='pgm'): """ Creo el PGM usando PGMPY. Por ahora es un modelo Bayesiano. Recibe la listas de aristas y las tablas CPD. Args: ebunch (list) : una lista de que contiene a las aristas del grafo. cpdtables (list) : un arreglo de diccionarios donde cada diccionario contiene la información necesaria para crear una tabla de probabilidad. plot (boolean) : una bandera para saber si guardo una imagen del grafo usando matplotlib. graph_id (str): el nombre para identificar el grafo. """ for cpdtable in cpdtables: self.variables_dict[cpdtable['variable']] = [\ _ for _ in range(cpdtable['variable_card'])] table = TabularCPD(variable=cpdtable['variable'],\ variable_card=cpdtable['variable_card'],\ values=cpdtable['values'],\ evidence_card=cpdtable.get('evidence_card'),\ evidence=cpdtable.get('evidence')) if cpdtable.get('evidence'): table.reorder_parents(sorted(cpdtable.get('evidence'))) self.pgmodel.add_cpds(table) if not self.pgmodel.check_model(): raise ValueError("Error with CPDTs") self.update_infer_system() if plot: self.save_pgm_as_img(pgm_id) def update_infer_system(self): """ Actualiza el sistema de inferencia para que sea compatible con el pgm. Usa VariableElimination. """ self.infer_system = VariableElimination(self.pgmodel) def get_variable_values(self, variable): """ Obtiene una lista de los valores que puede tomar una variable. """ return self.variables_dict.get(variable) def get_target_variable(self): """ Regresa una lista con las variables objetivo. """ return self.target def get_intervention_variables(self): """ Regresa una lista con las variables intervenibles. """ return self.intervention_variables def get_nature_variables(self): """ Regresa una lista con las variables que la naturaleza mueve. """ return self.nature_variables def get_ebunch(self): """ Regresa lista de aristas del modelo. """ return self.ebunch def get_nodes(self): """ Regresa lista de nodos aislados del modelo. """ return self.nodes def get_nature_var_prob(self, nature_variable): """ Regresa una lista con las probabilidades de los valores de una variable de la naturaleza dada como argumento. Args: nature_variable (str) : nombre de la variable. """ if nature_variable in self.nature_variables: return np.squeeze( self.pgmodel.get_cpds(nature_variable).get_values()) def conditional_probability(self, variable, evidence): """ Calcula la probabilidad de todos los valores de una variable dada la evidencia usando el método de eliminación de variable. """ return self.infer_system.query([variable], \ evidence=evidence, show_progress=False) def make_inference(self, variable, evidence): """ Ejecuta el motor de inferencia para obtener el valor de una variable dada la evidencia en un diccionario. Args: variable (str) : nombre de la variable a inferir. evidence (dict) : un diccionario con la evidencia de otras variables de la forma {variable : value}. """ return self.infer_system.map_query([variable],\ evidence=evidence, show_progress=False)[variable] def save_digraph_as_img(self, filename): """ Método auxiliar para guardar el DAG de networkx como imagen. """ pos = nx.circular_layout(self.digraph) nx.draw(self.digraph, with_labels=True, pos=pos) plt.savefig(filename) plt.show() plt.clf() def save_pgm_as_img(self, filename): """ Método auxiliar para guardar el DAG del pgmpy como imagen. """ nx.draw(self.digraph, with_labels=True) plt.show() plt.savefig(filename) plt.clf() def get_graph_toposort(self): """ Método que regresa una lista con las variables en orden topológico del DAG. """ return list(nx.topological_sort(self.digraph)) def get_nodes_and_predecessors(self): """ Regresa un arreglo de duplas nodo, predecesores ordenados. """ return { node : sorted(self.digraph.predecessors(node)) \ for node in self.digraph.nodes } def get_number_of_values(self, variable): """ to-do : un método para que me regrese cuantos valores posibles tiene una variable y tal vez hasta los valores correspondientes """ return len(self.variables_dict.get(variable, [])) def get_joint_prob_observation(self, observation): """ Obtiene la probabilidad de una observación. """ prob = self.infer_system.query(variables=list(observation.keys()), joint=True, show_progress=False) variables = prob.variables values = prob.values for i in range(len(variables)): value = observation[variables[i]] values = values[value] return values
def generateWysiwygData(samplesize=4000, filename="data/wysiwygdata4.csv"): ''' We define a bayesian model based on the WYSIWYG model from the thesis. There are 6 C variables and 6 X variables. For both C and X the first four are discrete variables, the other two continous. The variable C1 is causally influencing Y to assure a certain level of group unfairness in the data.''' wysiwygmodel = BayesianModel([('A', 'C1'), ('A', 'C2'), ('A', 'C3'), ('A', 'C4'), ('C1', 'Y'), ('Y', 'C2'), ('Y', 'C3'), ('Y', 'C4'), ('A', 'X1'), ('A', 'X2'), ('A', 'X3'), ('A', 'X4'), ('Y', 'X1'), ('Y', 'X2'), ('Y', 'X3'), ('Y', 'X4')]) cpd_a = TabularCPD(variable='A', variable_card=2, values=[[0.5], [0.5]]) cpd_y = TabularCPD(variable='Y', variable_card=2, values=[[0.65], [0.4], [0.35], [0.6]], evidence=['C1'], evidence_card=[2]) cpd_c1 = TabularCPD(variable='C1', variable_card=2, values=[[0.85, 0.2], [0.15, 0.8]], evidence=['A'], evidence_card=[2]) cpd_c2 = TabularCPD(variable='C2', variable_card=4, values=[[0.23, 0.27, 0.25, 0.20], [0.35, 0.23, 0.24, 0.15], [0.22, 0.27, 0.25, 0.25], [0.20, 0.23, 0.26, 0.40]], evidence=['A', 'Y'], evidence_card=[2, 2]) cpd_c3 = TabularCPD(variable='C3', variable_card=2, values=[[0.52, 0.49, 0.5, 0.45], [0.48, 0.51, 0.5, 0.55]], evidence=['A', 'Y'], evidence_card=[2, 2]) cpd_c4 = TabularCPD(variable='C4', variable_card=4, values=[[0.22, 0.25, 0.25, 0.37], [0.23, 0.25, 0.26, 0.21], [0.23, 0.25, 0.25, 0.22], [0.32, 0.25, 0.24, 0.20]], evidence=['A', 'Y'], evidence_card=[2, 2]) cpd_x1 = TabularCPD(variable='X1', variable_card=2, values=[[0.57, 0.48, 0.52, 0.38], [0.43, 0.52, 0.48, 0.62]], evidence=['A', 'Y'], evidence_card=[2, 2]) cpd_x2 = TabularCPD(variable='X2', variable_card=4, values=[[0.24, 0.28, 0.26, 0.19], [0.38, 0.22, 0.24, 0.15], [0.20, 0.28, 0.26, 0.23], [0.18, 0.22, 0.24, 0.43]], evidence=['A', 'Y'], evidence_card=[2, 2]) cpd_x3 = TabularCPD(variable='X3', variable_card=2, values=[[0.54, 0.48, 0.52, 0.4], [0.46, 0.52, 0.48, 0.6]], evidence=['A', 'Y'], evidence_card=[2, 2]) cpd_x4 = TabularCPD(variable='X4', variable_card=4, values=[[0.20, 0.25, 0.24, 0.40], [0.21, 0.25, 0.28, 0.21], [0.21, 0.25, 0.24, 0.21], [0.38, 0.25, 0.24, 0.18]], evidence=['A', 'Y'], evidence_card=[2, 2]) wysiwygmodel.add_cpds(cpd_a, cpd_c1, cpd_c2, cpd_c3, cpd_c4, cpd_x1, cpd_x2, cpd_x3, cpd_x4, cpd_y) datasamples = BayesianModelSampling(wysiwygmodel) discframe = datasamples.forward_sample(samplesize) AY = discframe[["A", "Y"]] C5 = samplecontinuous(AY, samplesize=samplesize, contatt="C5", meana0=1, meana1=1.2, covy0=[1], covy1=[0.9]) C6 = samplecontinuous(AY, samplesize=samplesize, contatt="C6", meana0=2, meana1=1.8, covy0=[1], covy1=[0.95]) X5 = samplecontinuous(AY, samplesize=samplesize, contatt="X5", meana0=1.1, meana1=1.4, covy0=[1.1], covy1=[0.95]) X6 = samplecontinuous(AY, samplesize=samplesize, contatt="X6", meana0=1.9, meana1=1.5, covy0=[1], covy1=[1.1]) discframe = pd.concat([discframe, C5, C6, X5, X6], axis=1) discframe.to_csv(path_or_buf=filename)
def generateWysiwygFIData(samplesize=4000, filename="data/preFIData.csv"): ''' The bayesian network that was used in the FI experiment. The edges between X and Y are flipped from the previous models, so X causally influences Y. The D variables are added to more closely approximate the experiments from the 'Fair Inference on Outcomes' paper. ''' wysiwygmodel = BayesianModel([('A', 'C1'), ('A', 'C2'), ('A', 'C3'), ('A', 'C4'), ('Y', 'C2'), ('Y', 'C3'), ('Y', 'C4'), ('A', 'X1'), ('A', 'X2'), ('A', 'X3'), ('A', 'X4'), ('X1', 'Y'), ('X2', 'Y'), ('X3', 'Y'), ('X4', 'Y'), ('D1', 'X1'), ('D1', 'X2'), ('D2', 'X3'), ('D3', 'X4')]) cpd_a = TabularCPD(variable='A', variable_card=2, values=[[0.5], [0.5]]) cpd_d1 = TabularCPD(variable='D1', variable_card=2, values=[[0.45], [0.55]]) cpd_d2 = TabularCPD(variable='D2', variable_card=4, values=[[0.22], [0.24], [0.28], [0.26]]) cpd_d3 = TabularCPD(variable='D3', variable_card=2, values=[[0.54], [0.46]]) ydists = computeYDist() cpd_y = TabularCPD(variable='Y', variable_card=2, values=[ydists[0], ydists[1]], evidence=['X1', 'X3', 'X2', 'X4'], evidence_card=[2, 2, 4, 4]) cpd_c1 = TabularCPD(variable='C1', variable_card=2, values=[[0.85, 0.2], [0.15, 0.8]], evidence=['A'], evidence_card=[2]) cpd_c2 = TabularCPD(variable='C2', variable_card=4, values=[[0.23, 0.27, 0.25, 0.20], [0.35, 0.23, 0.24, 0.15], [0.22, 0.27, 0.25, 0.25], [0.20, 0.23, 0.26, 0.40]], evidence=['A', 'Y'], evidence_card=[2, 2]) cpd_c3 = TabularCPD(variable='C3', variable_card=2, values=[[0.52, 0.49, 0.5, 0.45], [0.48, 0.51, 0.5, 0.55]], evidence=['A', 'Y'], evidence_card=[2, 2]) cpd_c4 = TabularCPD(variable='C4', variable_card=4, values=[[0.22, 0.25, 0.25, 0.37], [0.23, 0.25, 0.26, 0.21], [0.23, 0.25, 0.25, 0.22], [0.32, 0.25, 0.24, 0.20]], evidence=['A', 'Y'], evidence_card=[2, 2]) cpd_x1 = TabularCPD( variable='X1', variable_card=2, values=[ [0.38, 0.40, 0.60, 0.62], #GOOD [0.62, 0.60, 0.40, 0.38] ], evidence=['A', 'D1'], evidence_card=[2, 2]) cpd_x2 = TabularCPD( variable='X2', variable_card=4, values=[ [0.30, 0.28, 0.15, 0.14], [0.24, 0.26, 0.30, 0.32], #GOOD 2 [0.16, 0.18, 0.38, 0.40], #GOOD 1 [0.30, 0.28, 0.17, 0.14] ], evidence=['A', 'D1'], evidence_card=[2, 2]) cpd_x3 = TabularCPD( variable='X3', variable_card=2, values=[[0.64, 0.62, 0.62, 0.63, 0.38, 0.35, 0.35, 0.37], [0.36, 0.38, 0.38, 0.37, 0.62, 0.65, 0.65, 0.63]], #GOOD evidence=['A', 'D2'], evidence_card=[2, 4]) cpd_x4 = TabularCPD( variable='X4', variable_card=4, values=[ [0.25, 0.27, 0.07, 0.09], [0.36, 0.34, 0.64, 0.62], #GOOD1 [0.25, 0.27, 0.07, 0.09], [0.14, 0.12, 0.22, 0.20] ], #GOOD2 evidence=['A', 'D3'], evidence_card=[2, 2]) wysiwygmodel.add_cpds(cpd_a, cpd_c1, cpd_c2, cpd_c3, cpd_c4, cpd_x1, cpd_x2, cpd_x3, cpd_x4, cpd_y, cpd_d1, cpd_d2, cpd_d3) datasamples = BayesianModelSampling(wysiwygmodel) discframe = datasamples.forward_sample(samplesize) AY = discframe[["A", "Y"]] C5 = samplecontinuous(AY, samplesize=samplesize, contatt="C5", meana0=1, meana1=1.2, covy0=[1], covy1=[0.9]) C6 = samplecontinuous(AY, samplesize=samplesize, contatt="C6", meana0=2, meana1=1.8, covy0=[1], covy1=[0.95]) X5 = samplecontinuous(AY, samplesize=samplesize, contatt="X5", meana0=1.1, meana1=1.4, covy0=[1.1], covy1=[0.95]) X6 = samplecontinuous(AY, samplesize=samplesize, contatt="X6", meana0=1.9, meana1=1.5, covy0=[1], covy1=[1.1]) discframe = pd.concat([discframe, C5, C6, X5, X6], axis=1) ndf = discframe.reindex(axis=1, labels=[ 'A', 'Y', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'D1', 'D2', 'D3' ]) ndf.to_csv(path_or_buf=filename)
class TestBayesianModelCPD(unittest.TestCase): def setUp(self): self.G = BayesianModel([('d', 'g'), ('i', 'g'), ('g', 'l'), ('i', 's')]) def test_active_trail_nodes(self): self.assertEqual(sorted(self.G.active_trail_nodes('d')), ['d', 'g', 'l']) self.assertEqual(sorted(self.G.active_trail_nodes('i')), ['g', 'i', 'l', 's']) def test_active_trail_nodes_args(self): self.assertEqual(sorted(self.G.active_trail_nodes('d', observed='g')), ['d', 'i', 's']) self.assertEqual(sorted(self.G.active_trail_nodes('l', observed='g')), ['l']) self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['i', 'l'])), ['s']) self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['d', 'l'])), ['g', 'i', 's']) def test_is_active_trail_triplets(self): self.assertTrue(self.G.is_active_trail('d', 'l')) self.assertTrue(self.G.is_active_trail('g', 's')) self.assertFalse(self.G.is_active_trail('d', 'i')) self.assertTrue(self.G.is_active_trail('d', 'i', observed='g')) self.assertFalse(self.G.is_active_trail('d', 'l', observed='g')) self.assertFalse(self.G.is_active_trail('i', 'l', observed='g')) self.assertTrue(self.G.is_active_trail('d', 'i', observed='l')) self.assertFalse(self.G.is_active_trail('g', 's', observed='i')) def test_is_active_trail(self): self.assertFalse(self.G.is_active_trail('d', 's')) self.assertTrue(self.G.is_active_trail('s', 'l')) self.assertTrue(self.G.is_active_trail('d', 's', observed='g')) self.assertFalse(self.G.is_active_trail('s', 'l', observed='g')) def test_is_active_trail_args(self): self.assertFalse(self.G.is_active_trail('s', 'l', 'i')) self.assertFalse(self.G.is_active_trail('s', 'l', 'g')) self.assertTrue(self.G.is_active_trail('d', 's', 'l')) self.assertFalse(self.G.is_active_trail('d', 's', ['i', 'l'])) def test_get_cpds(self): cpd_d = TabularCPD('d', 2, values=np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, values=np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, values=np.random.rand(2, 4), evidence=['d', 'i'], evidence_card=[2, 2]) cpd_l = TabularCPD('l', 2, values=np.random.rand(2, 2), evidence=['g'], evidence_card=[2]) cpd_s = TabularCPD('s', 2, values=np.random.rand(2, 2), evidence=['i'], evidence_card=[2]) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d').variable, 'd') def test_get_cpds1(self): self.model = BayesianModel([('A', 'AB')]) cpd_a = TabularCPD('A', 2, values=np.random.rand(2, 1)) cpd_ab = TabularCPD('AB', 2, values=np.random.rand(2, 2), evidence=['A'], evidence_card=[2]) self.model.add_cpds(cpd_a, cpd_ab) self.assertEqual(self.model.get_cpds('A').variable, 'A') self.assertEqual(self.model.get_cpds('AB').variable, 'AB') self.assertRaises(ValueError, self.model.get_cpds, 'B') self.model.add_node('B') self.assertRaises(ValueError, self.model.get_cpds, 'B') def test_add_single_cpd(self): cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], [2]) self.G.add_cpds(cpd_s) self.assertListEqual(self.G.get_cpds(), [cpd_s]) def test_add_multiple_cpds(self): cpd_d = TabularCPD('d', 2, values=np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, values=np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, values=np.random.rand(2, 4), evidence=['d', 'i'], evidence_card=[2, 2]) cpd_l = TabularCPD('l', 2, values=np.random.rand(2, 2), evidence=['g'], evidence_card=[2]) cpd_s = TabularCPD('s', 2, values=np.random.rand(2, 2), evidence=['i'], evidence_card=[2]) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d'), cpd_d) self.assertEqual(self.G.get_cpds('i'), cpd_i) self.assertEqual(self.G.get_cpds('g'), cpd_g) self.assertEqual(self.G.get_cpds('l'), cpd_l) self.assertEqual(self.G.get_cpds('s'), cpd_s) def test_check_model(self): cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), evidence=['d', 'i'], evidence_card=[2, 2]) cpd_s = TabularCPD('s', 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=['i'], evidence_card=[2]) cpd_l = TabularCPD('l', 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=['g'], evidence_card=[2]) self.G.add_cpds(cpd_g, cpd_s, cpd_l) self.assertRaises(ValueError, self.G.check_model) cpd_d = TabularCPD('d', 2, values=[[0.8, 0.2]]) cpd_i = TabularCPD('i', 2, values=[[0.7, 0.3]]) self.G.add_cpds(cpd_d, cpd_i) self.assertTrue(self.G.check_model()) def test_check_model1(self): cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=['i'], evidence_card=[2]) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), evidence=['d', 's'], evidence_card=[2, 2]) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=['l'], evidence_card=[2]) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_l = TabularCPD('l', 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=['d'], evidence_card=[2]) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) cpd_l = TabularCPD('l', 2, values=np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), evidence=['d', 'i'], evidence_card=[2, 2]) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) cpd_l = TabularCPD('l', 2, values=np.array([[0.2, 0.3, 0.4, 0.6, 0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4, 0.8, 0.7, 0.6, 0.4]]), evidence=['g', 'd', 'i'], evidence_card=[2, 2, 2]) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) def test_check_model2(self): cpd_s = TabularCPD('s', 2, values=np.array([[0.5, 0.3], [0.8, 0.7]]), evidence=['i'], evidence_card=[2]) self.G.add_cpds(cpd_s) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_s) cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3, 0.4, 0.6], [0.3, 0.7, 0.6, 0.4]]), evidence=['d', 'i'], evidence_card=[2, 2]) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_l = TabularCPD('l', 2, values=np.array([[0.2, 0.3], [0.1, 0.7]]), evidence=['g'], evidence_card=[2]) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) def tearDown(self): del self.G
def network_construction(): """ Construction of the Bayesian_network input\output:none if the sencente "check the Bayesian network again" happens,please check the model parameters again! Symbol description: mode: CS: 压气机喘振;CF: 压气机结垢;CC:压气机磨损腐蚀;CI:压气机进气口结冰;TF:透平结垢;TC:透平磨损腐蚀; TD:透平叶片机械损伤;BF:燃烧室故障;BP:燃烧脉动;HW:轮间温度高;HB:叶片通道温差大 symptom:v:轰鸣声音;s:转速波动;f1:压气机压力波动;dp:压气机入口压差;r:压比;m1:压气机入口流量;m2:透平出口流量 t2:压气机出口温度;p2:压气机出口压力;t4:透平排烟温度;f2 燃烧室压力波动;d:排烟分散度 ce:压气机效率;te:透平效率;fi:空气过滤器滤芯结冰;htw:运行中某一点轮间温度高于限定值; htb:叶片通道(BPT)温度与平均值温度偏差大于报警限值 """ fault_model = BayesianModel([('CS', 'v'), ('CS', 's'), ('CS', 'f1'), ('CF', 'dp'), ('CF', 'm1'), ('CF', 'r'), ('CF', 'ce'), ('CC', 'm1'), ('CC', 'ce'), ('ce', 'p2'), ('ce', 't2'), ('TF', 'te'), ('TF', 'm2'), ('TC', 'te'), ('TC', 'm2'), ('te', 'p2'), ('te', 't4'), ('BF', 'd'), ('BP', 'f2'), ('BP', 'd'), ('CI', 'fi'), ('TD', 'te'), ('HB', 'htb'), ('HW', 'htw')]) # defining the parameters(conditional probability). # 故障模式的先验概率 (共11个故障模式) cs_cpd = TabularCPD(variable='CS', variable_card=2, values=[[0.05, 0.95]]) #压气机喘振 cf_cpd = TabularCPD(variable='CF', variable_card=2, values=[[0.2, 0.8]]) #压气机叶片结垢 cc_cpd = TabularCPD(variable='CC', variable_card=2, values=[[0.1, 0.9]]) #压气机叶片磨损腐蚀 ci_cpd = TabularCPD(variable='CI', variable_card=2, values=[[0.03, 0.97]]) #压气机进气口结冰 tf_cpd = TabularCPD(variable='TF', variable_card=2, values=[[0.1, 0.9]]) #透平叶片结垢 tc_cpd = TabularCPD(variable='TC', variable_card=2, values=[[0.1, 0.9]]) #透平叶片磨损腐蚀 td_cpd = TabularCPD(variable='TD', variable_card=2, values=[[0.05, 0.95]]) #透平叶片机械损伤 bf_cpd = TabularCPD(variable='BF', variable_card=2, values=[[0.1, 0.9]]) #燃烧室故障 bp_cpd = TabularCPD(variable='BP', variable_card=2, values=[[0.1, 0.9]]) #燃烧脉动 hw_cpd = TabularCPD(variable='HW', variable_card=2, values=[[0.1, 0.9]]) #轮间温度高 hb_cpd = TabularCPD(variable='HB', variable_card=2, values=[[0.1, 0.9]]) #叶片通道温差大 # 故障征兆的条件概率。(共14个征兆,其中2个是能效指标)以Noise-or为原则进行赋值。 #父节点认为相互独立;泄露概率0.01;0.9 强烈关联;0.8代表有关联;0.7代表可能关联 0.6代表不确定关联是否成立 ce_cpd = TabularCPD(variable='ce', variable_card=2, evidence=['CF', 'CC'], evidence_card=[2, 2], values=[[0.99, 0.1, 0.1, 0.0099], [0.01, 0.9, 0.9, 0.9901]]) #能效异常模式 te_cpd = TabularCPD( variable='te', variable_card=2, evidence=['TF', 'TC', 'TD'], evidence_card=[2, 2, 2], values=[[0.99, 0.1, 0.1, 0.0099, 0.1, 0.0099, 0.0099, 0.00099], [0.01, 0.9, 0.9, 0.9901, 0.9, 0.9901, 0.9901, 0.99901]]) #能效异常模式 v_cpd = TabularCPD(variable='v', variable_card=2, evidence=['CS'], evidence_card=[2], values=[[0.99, 0.1], [0.01, 0.9]]) s_cpd = TabularCPD(variable='s', variable_card=2, evidence=['CS'], evidence_card=[2], values=[[0.99, 0.1], [0.01, 0.9]]) f1_cpd = TabularCPD(variable='f1', variable_card=2, evidence=['CS'], evidence_card=[2], values=[[0.99, 0.1], [0.01, 0.9]]) dp_cpd = TabularCPD(variable='dp', variable_card=2, evidence=['CF'], evidence_card=[2], values=[[0.99, 0.1], [0.01, 0.9]]) m1_cpd = TabularCPD(variable='m1', variable_card=2, evidence=['CF', 'CC'], evidence_card=[2, 2], values=[[0.99, 0.1, 0.2, 0.0198], [0.01, 0.9, 0.8, 0.9802]]) r_cpd = TabularCPD(variable='r', variable_card=2, evidence=['CF'], evidence_card=[2], values=[[0.99, 0.1], [0.01, 0.9]]) t2_cpd = TabularCPD(variable='t2', variable_card=2, evidence=['ce'], evidence_card=[2], values=[[0.99, 0.1], [0.01, 0.9]]) p2_cpd = TabularCPD(variable='p2', variable_card=2, evidence=['ce', 'te'], evidence_card=[2, 2], values=[[0.99, 0.3, 0.2, 0.0594], [0.01, 0.7, 0.8, 0.9406]]) t4_cpd = TabularCPD(variable='t4', variable_card=2, evidence=['te'], evidence_card=[2], values=[[0.99, 0.1], [0.01, 0.9]]) m2_cpd = TabularCPD(variable='m2', variable_card=2, evidence=['TF', 'TC'], evidence_card=[2, 2], values=[[0.99, 0.2, 0.1, 0.0198], [0.01, 0.8, 0.9, 0.9802]]) f2_cpd = TabularCPD(variable='f2', variable_card=2, evidence=['BP'], evidence_card=[2], values=[[0.99, 0.1], [0.01, 0.9]]) d_cpd = TabularCPD(variable='d', variable_card=2, evidence=['BF', 'BP'], evidence_card=[2, 2], values=[[0.99, 0.2, 0.1, 0.0198], [0.01, 0.8, 0.9, 0.9802]]) fi_cpd = TabularCPD(variable='fi', variable_card=2, evidence=['CI'], evidence_card=[2], values=[[0.99, 0.1], [0.01, 0.9]]) htb_cpd = TabularCPD(variable='htb', variable_card=2, evidence=['HB'], evidence_card=[2], values=[[0.99, 0.1], [0.01, 0.9]]) htw_cpd = TabularCPD(variable='htw', variable_card=2, evidence=['HW'], evidence_card=[2], values=[[0.99, 0.1], [0.01, 0.9]]) #Associating the parameters with the model structure. fault_model.add_cpds(cs_cpd, cf_cpd, cc_cpd, ci_cpd, tf_cpd, tc_cpd, td_cpd, bf_cpd, bp_cpd, hb_cpd, hw_cpd, ce_cpd, te_cpd, v_cpd, s_cpd, f1_cpd, dp_cpd, m1_cpd, r_cpd, t2_cpd, p2_cpd, t4_cpd, m2_cpd, f2_cpd, d_cpd, fi_cpd, htb_cpd, htw_cpd) # Checking if the cpds are valid for the model. try: fault_model.check_model() except ValueError: print('check the Bayesian network again') else: joblib.dump(fault_model, 'model/fault_model.pkl') #模型存储 return fault_model
class TestDirectedGraphCPDOperations(unittest.TestCase): def setUp(self): self.graph = BayesianModel() def test_add_single_cpd(self): cpd = TabularCPD('grade', 2, values=np.random.rand(2, 4), evidence=['diff', 'intel'], evidence_card=[2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd) self.assertListEqual(self.graph.get_cpds(), [cpd]) def test_add_multiple_cpds(self): cpd1 = TabularCPD('diff', 2, values=np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, values=np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, values=np.random.rand(2, 4), evidence=['diff', 'intel'], evidence_card=[2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.assertListEqual(self.graph.get_cpds(), [cpd1, cpd2, cpd3]) def test_remove_single_cpd(self): cpd1 = TabularCPD('diff', 2, values=np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, values=np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, values=np.random.rand(2, 4), evidence=['diff', 'intel'], evidence_card=[2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.graph.remove_cpds(cpd1) self.assertListEqual(self.graph.get_cpds(), [cpd2, cpd3]) def test_remove_multiple_cpds(self): cpd1 = TabularCPD('diff', 2, values=np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, values=np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, values=np.random.rand(2, 4), evidence=['diff', 'intel'], evidence_card=[2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.graph.remove_cpds(cpd1, cpd3) self.assertListEqual(self.graph.get_cpds(), [cpd2]) def test_remove_single_cpd_string(self): cpd1 = TabularCPD('diff', 2, values=np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, values=np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, values=np.random.rand(2, 4), evidence=['diff', 'intel'], evidence_card=[2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.graph.remove_cpds('diff') self.assertListEqual(self.graph.get_cpds(), [cpd2, cpd3]) def test_remove_multiple_cpds_string(self): cpd1 = TabularCPD('diff', 2, values=np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, values=np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, values=np.random.rand(2, 4), evidence=['diff', 'intel'], evidence_card=[2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.graph.remove_cpds('diff', 'grade') self.assertListEqual(self.graph.get_cpds(), [cpd2]) def test_get_cpd_for_node(self): cpd1 = TabularCPD('diff', 2, values=np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, values=np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, values=np.random.rand(2, 4), evidence=['diff', 'intel'], evidence_card=[2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.assertEqual(self.graph.get_cpds('diff'), cpd1) self.assertEqual(self.graph.get_cpds('intel'), cpd2) self.assertEqual(self.graph.get_cpds('grade'), cpd3) def test_get_cpd_raises_error(self): cpd1 = TabularCPD('diff', 2, values=np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, values=np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, values=np.random.rand(2, 4), evidence=['diff', 'intel'], evidence_card=[2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.assertRaises(ValueError, self.graph.get_cpds, 'sat') def tearDown(self): del self.graph
class Inference(object): """ Base class for all inference algorithms. Converts BayesianModel and MarkovModel to a uniform representation so that inference algorithms can be applied. Also it checks if all the associated CPDs / Factors are consistent with the model. Initialize inference for a model. Parameters ---------- model: pgmpy.models.BayesianModel or pgmpy.models.MarkovModel or pgmpy.models.NoisyOrModel model for which to initialize the inference object. Examples -------- >>> from pgmpy.inference import Inference >>> from pgmpy.models import BayesianModel >>> from pgmpy.factors import TabularCPD >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]]) >>> intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]]) >>> grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1], ... [0.1, 0.1, 0.1, 0.1], ... [0.8, 0.8, 0.8, 0.8]], ... evidence=['diff', 'intel'], evidence_card=[2, 2]) >>> student.add_cpds(diff_cpd, intel_cpd, grade_cpd) >>> model = Inference(student) >>> from pgmpy.models import MarkovModel >>> from pgmpy.factors import Factor >>> import numpy as np >>> student = MarkovModel([('Alice', 'Bob'), ('Bob', 'Charles'), ... ('Charles', 'Debbie'), ('Debbie', 'Alice')]) >>> factor_a_b = Factor(['Alice', 'Bob'], cardinality=[2, 2], value=np.random.rand(4)) >>> factor_b_c = Factor(['Bob', 'Charles'], cardinality=[2, 2], value=np.random.rand(4)) >>> factor_c_d = Factor(['Charles', 'Debbie'], cardinality=[2, 2], value=np.random.rand(4)) >>> factor_d_a = Factor(['Debbie', 'Alice'], cardinality=[2, 2], value=np.random.rand(4)) >>> student.add_factors(factor_a_b, factor_b_c, factor_c_d, factor_d_a) >>> model = Inference(student) """ @StateNameInit() def __init__(self, model): self.model = model model.check_model() if isinstance(model, JunctionTree): self.variables = set(chain(*model.nodes())) else: self.variables = model.nodes() self.cardinality = {} self.factors = defaultdict(list) if isinstance(model, BayesianModel): for node in model.nodes(): cpd = model.get_cpds(node) cpd_as_factor = cpd.to_factor() self.cardinality[node] = cpd.variable_card for var in cpd.variables: self.factors[var].append(cpd_as_factor) elif isinstance(model, (MarkovModel, FactorGraph, JunctionTree)): self.cardinality = model.get_cardinality() for factor in model.get_factors(): for var in factor.variables: self.factors[var].append(factor) elif isinstance(model, DynamicBayesianNetwork): self.start_bayesian_model = BayesianModel(model.get_intra_edges(0)) self.start_bayesian_model.add_cpds(*model.get_cpds(time_slice=0)) cpd_inter = [ model.get_cpds(node) for node in model.get_interface_nodes(1) ] self.interface_nodes = model.get_interface_nodes(0) self.one_and_half_model = BayesianModel(model.get_inter_edges() + model.get_intra_edges(1)) self.one_and_half_model.add_cpds(*(model.get_cpds(time_slice=1) + cpd_inter))
class Inference(object): """ Base class for all inference algorithms. Converts BayesianModel and MarkovModel to a uniform representation so that inference algorithms can be applied. Also it checks if all the associated CPDs / Factors are consistent with the model. Initialize inference for a model. Parameters ---------- model: pgmpy.models.BayesianModel or pgmpy.models.MarkovModel or pgmpy.models.NoisyOrModel model for which to initialize the inference object. Examples -------- >>> from pgmpy.inference import Inference >>> from pgmpy.models import BayesianModel >>> from pgmpy.factors import TabularCPD >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]]) >>> intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]]) >>> grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1], ... [0.1, 0.1, 0.1, 0.1], ... [0.8, 0.8, 0.8, 0.8]], ... evidence=['diff', 'intel'], evidence_card=[2, 2]) >>> student.add_cpds(diff_cpd, intel_cpd, grade_cpd) >>> model = Inference(student) >>> from pgmpy.models import MarkovModel >>> from pgmpy.factors import Factor >>> import numpy as np >>> student = MarkovModel([('Alice', 'Bob'), ('Bob', 'Charles'), ... ('Charles', 'Debbie'), ('Debbie', 'Alice')]) >>> factor_a_b = Factor(['Alice', 'Bob'], cardinality=[2, 2], value=np.random.rand(4)) >>> factor_b_c = Factor(['Bob', 'Charles'], cardinality=[2, 2], value=np.random.rand(4)) >>> factor_c_d = Factor(['Charles', 'Debbie'], cardinality=[2, 2], value=np.random.rand(4)) >>> factor_d_a = Factor(['Debbie', 'Alice'], cardinality=[2, 2], value=np.random.rand(4)) >>> student.add_factors(factor_a_b, factor_b_c, factor_c_d, factor_d_a) >>> model = Inference(student) """ def __init__(self, model): self.model = model model.check_model() if isinstance(model, JunctionTree): self.variables = set(chain(*model.nodes())) else: self.variables = model.nodes() self.cardinality = {} self.factors = defaultdict(list) if isinstance(model, BayesianModel): for node in model.nodes(): cpd = model.get_cpds(node) cpd_as_factor = cpd.to_factor() self.cardinality[node] = cpd.variable_card for var in cpd.variables: self.factors[var].append(cpd_as_factor) elif isinstance(model, (MarkovModel, FactorGraph, JunctionTree)): self.cardinality = model.get_cardinality() for factor in model.get_factors(): for var in factor.variables: self.factors[var].append(factor) elif isinstance(model, DynamicBayesianNetwork): self.start_bayesian_model = BayesianModel(model.get_intra_edges(0)) self.start_bayesian_model.add_cpds(*model.get_cpds(time_slice=0)) cpd_inter = [model.get_cpds(node) for node in model.get_interface_nodes(1)] self.interface_nodes = model.get_interface_nodes(0) self.one_and_half_model = BayesianModel(model.get_inter_edges() + model.get_intra_edges(1)) self.one_and_half_model.add_cpds(*(model.get_cpds(time_slice=1) + cpd_inter))
evidence_card=[2, 2]) cpd_l = TabularCPD(variable='L', variable_card=2, values=[[0.1, 0.4, 0.99], [0.9, 0.6, 0.01]], evidence=['G'], evidence_card=[3]) cpd_s = TabularCPD(variable='S', variable_card=2, values=[[0.95, 0.2], [0.05, 0.8]], evidence=['I'], evidence_card=[2]) # Associating the CPDs with the network model.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly # defined and sum to 1. model.check_model() # In[21]: # We can now call some methods on the BayesianModel object. model.get_cpds() # In[22]: print(model.get_cpds('G')) # In[23]:
('traffic_jam', 'long_queues'), ('traffic_jam', 'late_for_school'), ('getting_up_late', 'late_for_school')]) cpd_rain = TabularCPD('rain', 2, [[0.4], [0.6]]) cpd_accident = TabularCPD('accident', 2, [[0.2], [0.8]]) cpd_traffic_jam = TabularCPD('traffic_jam', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=['rain', 'accident'], evidence_card=[2, 2]) cpd_getting_up_late = TabularCPD('getting_up_late', 2, [[0.6], [0.4]]) cpd_late_for_school = TabularCPD('late_for_school', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence = ['getting_up_late', 'traffic_jam'], evidence_card=[2, 2]) cpd_long_queues = TabularCPD('long_queues', 2, [[0.9, 0.2], [0.1, 0.8]], evidence=['traffic_jam'], evidence_card=[2]) model.add_cpds(cpd_rain, cpd_accident, cpd_traffic_jam, cpd_getting_up_late, cpd_late_for_school, cpd_long_queues) cbp_inference = CBP(model) cbp_inference.map_query(variables=['traffic_jam', 'late_for_school']) cbp_inference.map_query(variables=['traffic_jam'], evidence={'accident': 1, 'long_queues': 0})
def generateWysiwygFIDataOld(samplesize=4000, filename="data/preFIData.csv"): ''' old version of the bayesian model for the Fair Inference experiment. Here Y still influences X to make modelling Y simpler. This is not suitable for FI. This model is unused in the experiments in the final thesis. ''' wysiwygmodel = BayesianModel([('A', 'C1'), ('A', 'C2'), ('A', 'C3'), ('A', 'C4'), ('C1', 'Y'), ('Y', 'C2'), ('Y', 'C3'), ('Y', 'C4'), ('A', 'X1'), ('A', 'X2'), ('A', 'X3'), ('A', 'X4'), ('Y', 'X1'), ('Y', 'X2'), ('Y', 'X3'), ('Y', 'X4'), ('D1', 'X1'), ('D1', 'X2'), ('D2', 'X3'), ('D3', 'X4')]) cpd_a = TabularCPD(variable='A', variable_card=2, values=[[0.5], [0.5]]) cpd_d1 = TabularCPD(variable='D1', variable_card=2, values=[[0.45], [0.55]]) cpd_d2 = TabularCPD(variable='D2', variable_card=4, values=[[0.22], [0.24], [0.28], [0.26]]) cpd_d3 = TabularCPD(variable='D3', variable_card=2, values=[[0.54], [0.46]]) cpd_y = TabularCPD(variable='Y', variable_card=2, values=[[0.7], [0.3], [0.3], [0.7]], evidence=['C1'], evidence_card=[2]) cpd_c1 = TabularCPD(variable='C1', variable_card=2, values=[[0.85, 0.2], [0.15, 0.8]], evidence=['A'], evidence_card=[2]) cpd_c2 = TabularCPD(variable='C2', variable_card=4, values=[[0.23, 0.27, 0.25, 0.20], [0.35, 0.23, 0.24, 0.15], [0.22, 0.27, 0.25, 0.25], [0.20, 0.23, 0.26, 0.40]], evidence=['A', 'Y'], evidence_card=[2, 2]) cpd_c3 = TabularCPD(variable='C3', variable_card=2, values=[[0.52, 0.49, 0.5, 0.45], [0.48, 0.51, 0.5, 0.55]], evidence=['A', 'Y'], evidence_card=[2, 2]) cpd_c4 = TabularCPD(variable='C4', variable_card=4, values=[[0.22, 0.25, 0.25, 0.37], [0.23, 0.25, 0.26, 0.21], [0.23, 0.25, 0.25, 0.22], [0.32, 0.25, 0.24, 0.20]], evidence=['A', 'Y'], evidence_card=[2, 2]) cpd_x1 = TabularCPD( variable='X1', variable_card=2, values=[ [0.38, 0.40, 0.42, 0.44, 0.57, 0.59, 0.60, 0.62], #GOOD [0.62, 0.60, 0.58, 0.56, 0.43, 0.41, 0.40, 0.38] ], evidence=['A', 'Y', 'D1'], evidence_card=[2, 2, 2]) cpd_x2 = TabularCPD( variable='X2', variable_card=4, values=[ [0.30, 0.28, 0.27, 0.25, 0.17, 0.16, 0.15, 0.14], [0.24, 0.26, 0.26, 0.27, 0.29, 0.31, 0.30, 0.32], #GOOD 2 [0.16, 0.18, 0.20, 0.22, 0.35, 0.37, 0.38, 0.40], #GOOD 1 [0.30, 0.28, 0.27, 0.26, 0.19, 0.16, 0.17, 0.14] ], evidence=['A', 'Y', 'D1'], evidence_card=[2, 2, 2]) cpd_x3 = TabularCPD( variable='X3', variable_card=2, values=[[ 0.64, 0.62, 0.62, 0.63, 0.60, 0.58, 0.58, 0.59, 0.40, 0.39, 0.39, 0.38, 0.38, 0.35, 0.35, 0.37 ], [ 0.36, 0.38, 0.38, 0.37, 0.40, 0.42, 0.42, 0.41, 0.60, 0.61, 0.61, 0.62, 0.62, 0.65, 0.65, 0.63 ]], #GOOD evidence=['A', 'Y', 'D2'], evidence_card=[2, 2, 4]) cpd_x4 = TabularCPD( variable='X4', variable_card=4, values=[ [0.25, 0.27, 0.21, 0.23, 0.10, 0.12, 0.07, 0.09], [0.36, 0.34, 0.42, 0.40, 0.60, 0.58, 0.64, 0.62], #GOOD1 [0.25, 0.27, 0.21, 0.23, 0.10, 0.12, 0.07, 0.09], [0.14, 0.12, 0.16, 0.14, 0.20, 0.18, 0.22, 0.20] ], #GOOD2 evidence=['A', 'Y', 'D3'], evidence_card=[2, 2, 2]) wysiwygmodel.add_cpds(cpd_a, cpd_c1, cpd_c2, cpd_c3, cpd_c4, cpd_x1, cpd_x2, cpd_x3, cpd_x4, cpd_y, cpd_d1, cpd_d2, cpd_d3) datasamples = BayesianModelSampling(wysiwygmodel) discframe = datasamples.forward_sample(samplesize) AY = discframe[["A", "Y"]] C5 = samplecontinuous(AY, samplesize=samplesize, contatt="C5", meana0=1, meana1=1.2, covy0=[1], covy1=[0.9]) C6 = samplecontinuous(AY, samplesize=samplesize, contatt="C6", meana0=2, meana1=1.8, covy0=[1], covy1=[0.95]) X5 = samplecontinuous(AY, samplesize=samplesize, contatt="X5", meana0=1.1, meana1=1.4, covy0=[1.1], covy1=[0.95]) X6 = samplecontinuous(AY, samplesize=samplesize, contatt="X6", meana0=1.9, meana1=1.5, covy0=[1], covy1=[1.1]) discframe = pd.concat([discframe, C5, C6, X5, X6], axis=1) ndf = discframe.reindex(axis=1, labels=[ 'A', 'Y', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'D1', 'D2', 'D3' ]) ndf.to_csv(path_or_buf=filename)