def make_cpmf_PrXcZ(self, X, Z, PrZ=None): if PrZ is None: PrZ = self.AD_tree.make_pmf(list(Z)) unsorted_variables = [X] + Z joint_variables = sorted(unsorted_variables) index = {var: joint_variables.index(var) for var in joint_variables} PrXZ = self.AD_tree.make_pmf(joint_variables) PrXcZ = CPMF(None, None) for joint_key, joint_p in PrXZ.items(): zkey = tuple([joint_key[index[zvar]] for zvar in Z]) varkey = [ joint_key[index[var]] for var in unsorted_variables if var not in Z ][0] if len(zkey) == 1: zkey = zkey[0] try: pmf = PrXcZ.conditional_probabilities[zkey] except KeyError: pmf = PMF(None) PrXcZ.conditional_probabilities[zkey] = pmf try: pmf.probabilities[varkey] = joint_p / PrZ.p(zkey) except ZeroDivisionError: pass return (PrXcZ, PrXZ)
def calculate_pmf_for_cmi( X: Variable, Y: Variable, Z: Union[Variable, JointVariables], ) -> tuple[CPMF, CPMF, CPMF, PMF]: PrXYcZ = CPMF(JointVariables(X, Y), Z) PrXcZ = CPMF(X, Z) PrYcZ = CPMF(Y, Z) PrZ = PMF(Z) return (PrXYcZ, PrXcZ, PrYcZ, PrZ)
def conditional_mutual_information( PrXYcZ: CPMF, PrXcZ: CPMF, PrYcZ: CPMF, PrZ: PMF, base: Union[float, str] = 2, ) -> float: logarithm = create_logarithm_function(base) cMI = 0.0 for (z, pz) in PrZ.items(): for (x, pxcz) in PrXcZ.given(z).items(): for (y, pycz) in PrYcZ.given(z).items(): pxycz = PrXYcZ.given(z).p(x, y) if pxycz == 0 or pxcz == 0 or pycz == 0: continue else: pcMI = pz * pxycz * logarithm(pxycz / (pxcz * pycz)) cMI += pcMI return abs(cMI)
def assert_cpmf_adtree_vs_dm(dm, adtree, cd_vars, cn_vars): if isinstance(cd_vars, int): cd_vars = [cd_vars] if isinstance(cn_vars, int): cn_vars = [cn_vars] cd_vars = dm.get_variables('X', cd_vars) cn_vars = dm.get_variables('X', cn_vars) expected_cpmf = CPMF(cd_vars, cn_vars) calculated_cpmf = adtree.make_cpmf(cd_vars, cn_vars) assert expected_cpmf == calculated_cpmf
def test_conditional_pmf__from_bayesian_network(): configuration = dict() configuration['sourcepath'] = testutil.bif_folder / 'survey.bif' configuration['sample_count'] = int(4e4) # Using a random seed of 42 somehow requires 2e6 samples to pass, but # with the seed 1984, it is sufficient to generate only 4e4. Maybe the # random generator is biased somehow? configuration['random_seed'] = 1984 configuration['values_as_indices'] = False configuration['objectives'] = ['R', 'TRN'] bayesian_network = BayesianNetwork.from_bif_file(configuration['sourcepath'], use_cache=False) bayesian_network.finalize() sbnds = SampledBayesianNetworkDatasetSource(configuration) sbnds.reset_random_seed = True datasetmatrix = sbnds.create_dataset_matrix('test_sbnds') assert ['AGE', 'EDU', 'OCC', 'SEX'] == datasetmatrix.column_labels_X assert ['R', 'TRN'] == datasetmatrix.column_labels_Y AGE = Variable(datasetmatrix.get_column_by_label('X', 'AGE')) PrAge = PMF(AGE) SEX = Variable(datasetmatrix.get_column_by_label('X', 'SEX')) PrSex = PMF(SEX) assert_PMF_AlmostEquals_BNProbDist( bayesian_network.variable_nodes['AGE'].probdist, PrAge) assert_PMF_AlmostEquals_BNProbDist( bayesian_network.variable_nodes['SEX'].probdist, PrSex) EDU = Variable(datasetmatrix.get_column_by_label('X', 'EDU')) PrEdu = CPMF(EDU, given=JointVariables(AGE, SEX)) assert_CPMF_AlmostEquals_BNProbDist( bayesian_network.variable_nodes['EDU'].probdist, PrEdu) OCC = Variable(datasetmatrix.get_column_by_label('X', 'OCC')) PrOcc = CPMF(OCC, given=EDU) assert_CPMF_AlmostEquals_BNProbDist( bayesian_network.variable_nodes['OCC'].probdist, PrOcc) R = Variable(datasetmatrix.get_column_by_label('Y', 'R')) PrR = CPMF(R, given=EDU) assert_CPMF_AlmostEquals_BNProbDist( bayesian_network.variable_nodes['R'].probdist, PrR) TRN = Variable(datasetmatrix.get_column_by_label('Y', 'TRN')) PrTRN = CPMF(TRN, given=JointVariables(OCC, R)) assert_CPMF_AlmostEquals_BNProbDist( bayesian_network.variable_nodes['TRN'].probdist, PrTRN)
def test_conditional_pmf__multiple_values(): sizes = Variable(['small', 'small', 'large', 'small', 'normal', 'small']) sizes.ID = 1 sizes.name = 'sizes' colors = Variable(['gray', 'yellow', 'brown', 'silver', 'white', 'gray']) colors.ID = 2 colors.name = 'colors' animals = Variable(['cat', 'dog', 'cat', 'snake', 'dog', 'cat']) animals.ID = 3 animals.name = 'animals' is_pet = Variable(['yes', 'yes', 'yes', 'maybe', 'yes', 'yes']) is_pet.ID = 4 is_pet.name = 'is_pet' Pr = CPMF(JointVariables(colors, is_pet), JointVariables(sizes, animals)) assert Pr.given('small', 'cat').p('gray', 'yes') == 2 / 2 assert Pr.given('small', 'cat').p('yellow', 'yes') == 0 / 1 assert Pr.given('small', 'cat').p('brown', 'maybe') == 0 / 1 assert Pr.given('small', 'dog').p('yellow', 'yes') == 1 / 1 assert Pr.given('small', 'dog').p('yellow', 'maybe') == 0 / 1 assert Pr.given('small', 'dog').p('silver', 'maybe') == 0 / 1 assert Pr.given('large', 'cat').p('brown', 'yes') == 1 / 1 assert Pr.given('large', 'cat').p('yellow', 'yes') == 0 / 1 assert Pr.given('small', 'snake').p('silver', 'maybe') == 1 / 1 assert Pr.given('small', 'snake').p('silver', 'no') == 0 / 1 assert Pr.given('normal', 'dog').p('white', 'yes') == 1 / 1 assert Pr.given('normal', 'dog').p('silver', 'yes') == 0 / 1 assert Pr.given('normal', 'dog').p('yellow', 'maybe') == 0 / 1 SA = JointVariables(sizes, animals) PrAll = CPMF(JointVariables(colors, is_pet), SA) PrSA = PMF(SA) PrCcSA = CPMF(colors, SA) PrIPcSA = CPMF(is_pet, SA) test_p_all = 0.0 test_p_c = 0.0 test_p_ip = 0.0 for (sa, psa) in PrSA.items(): for (c, pcsa) in PrCcSA.given(sa).items(): test_p_c += pcsa * PrSA.p(sa) for (ip, pipsa) in PrIPcSA.given(sa).items(): pall = PrAll.given(sa).p(c, ip) test_p_all += pall * PrSA.p(sa) test_p_ip += pipsa * PrSA.p(sa) assert almostEqual(1, test_p_all) assert almostEqual(1, test_p_c) assert almostEqual(1, test_p_ip)
def test_conditional_pmf__binary(): V0 = Variable([0, 1, 0, 1, 0, 1, 0, 1]) V1 = Variable([0, 0, 1, 1, 0, 0, 1, 1]) V2 = Variable([0, 0, 0, 0, 1, 1, 1, 1]) V78 = Variable([0, 0, 0, 0, 0, 0, 1, 1]) Pr = CPMF(V0, V78) assert Pr.given(0).p(0) == 3 / 6 assert Pr.given(0).p(1) == 3 / 6 assert Pr.given(1).p(0) == 1 / 2 assert Pr.given(1).p(1) == 1 / 2 Pr = CPMF(V2, V78) assert Pr.given(0).p(0) == 4 / 6 assert Pr.given(0).p(1) == 2 / 6 assert Pr.given(1).p(0) == 0 / 2 assert Pr.given(1).p(1) == 2 / 2 Pr = CPMF(V78, V1) assert Pr.given(0).p(0) == 4 / 4 assert Pr.given(0).p(1) == 0 / 4 assert Pr.given(1).p(0) == 2 / 4 assert Pr.given(1).p(1) == 2 / 4 Pr = CPMF(V1, JointVariables(V2, V78)) assert Pr.given(0, 0).p(0) == 2 / 4 assert Pr.given(0, 0).p(1) == 2 / 4 assert Pr.given(0, 1).p(0) == 0 / 1 assert Pr.given(0, 1).p(1) == 0 / 1 assert Pr.given(1, 0).p(0) == 2 / 2 assert Pr.given(1, 0).p(1) == 0 / 2 assert Pr.given(1, 1).p(0) == 0 / 2 assert Pr.given(1, 1).p(1) == 2 / 2