def test_joint_variables_pmf(): animals = Variable(['cat', 'dog', 'cat', 'mouse', 'dog', 'cat']) animals.ID = 3 animals.name = 'animals' colors = Variable(['gray', 'yellow', 'brown', 'silver', 'white', 'gray']) colors.ID = 2 colors.name = 'colors' sizes = Variable(['small', 'small', 'large', 'small', 'normal', 'small']) sizes.ID = 1 sizes.name = 'sizes' fauna = JointVariables(sizes, colors, animals) fauna.update_values() assert [1, 2, 3] == fauna.variableIDs assert fauna.variables[0] is sizes assert fauna.variables[1] is colors assert fauna.variables[2] is animals expected_values = [('large', 'brown', 'cat'), ('normal', 'white', 'dog'), ('small', 'gray', 'cat'), ('small', 'silver', 'mouse'), ('small', 'yellow', 'dog')] assert fauna.values == expected_values PrFauna = PMF(fauna) assert PrFauna.p('small', 'gray', 'cat') == 2 / 6 assert PrFauna.p('small', 'silver', 'mouse') == 1 / 6 assert PrFauna.p('small', 'silver', 'dog') == 0 singleton_joint = JointVariables(animals) assert ['cat', 'dog', 'cat', 'mouse', 'dog', 'cat'] == singleton_joint.instances()
def test_conditional_pmf__multiple_values(): sizes = Variable(['small', 'small', 'large', 'small', 'normal', 'small']) sizes.ID = 1 sizes.name = 'sizes' colors = Variable(['gray', 'yellow', 'brown', 'silver', 'white', 'gray']) colors.ID = 2 colors.name = 'colors' animals = Variable(['cat', 'dog', 'cat', 'snake', 'dog', 'cat']) animals.ID = 3 animals.name = 'animals' is_pet = Variable(['yes', 'yes', 'yes', 'maybe', 'yes', 'yes']) is_pet.ID = 4 is_pet.name = 'is_pet' Pr = CPMF(JointVariables(colors, is_pet), JointVariables(sizes, animals)) assert Pr.given('small', 'cat').p('gray', 'yes') == 2 / 2 assert Pr.given('small', 'cat').p('yellow', 'yes') == 0 / 1 assert Pr.given('small', 'cat').p('brown', 'maybe') == 0 / 1 assert Pr.given('small', 'dog').p('yellow', 'yes') == 1 / 1 assert Pr.given('small', 'dog').p('yellow', 'maybe') == 0 / 1 assert Pr.given('small', 'dog').p('silver', 'maybe') == 0 / 1 assert Pr.given('large', 'cat').p('brown', 'yes') == 1 / 1 assert Pr.given('large', 'cat').p('yellow', 'yes') == 0 / 1 assert Pr.given('small', 'snake').p('silver', 'maybe') == 1 / 1 assert Pr.given('small', 'snake').p('silver', 'no') == 0 / 1 assert Pr.given('normal', 'dog').p('white', 'yes') == 1 / 1 assert Pr.given('normal', 'dog').p('silver', 'yes') == 0 / 1 assert Pr.given('normal', 'dog').p('yellow', 'maybe') == 0 / 1 SA = JointVariables(sizes, animals) PrAll = CPMF(JointVariables(colors, is_pet), SA) PrSA = PMF(SA) PrCcSA = CPMF(colors, SA) PrIPcSA = CPMF(is_pet, SA) test_p_all = 0.0 test_p_c = 0.0 test_p_ip = 0.0 for (sa, psa) in PrSA.items(): for (c, pcsa) in PrCcSA.given(sa).items(): test_p_c += pcsa * PrSA.p(sa) for (ip, pipsa) in PrIPcSA.given(sa).items(): pall = PrAll.given(sa).p(c, ip) test_p_all += pall * PrSA.p(sa) test_p_ip += pipsa * PrSA.p(sa) assert almostEqual(1, test_p_all) assert almostEqual(1, test_p_c) assert almostEqual(1, test_p_ip)
def test_make_cpmf_PrXcZ_variant_1() -> None: V0 = Variable([0, 1, 1, 1, 0, 1, 0, 1]) V1 = Variable([0, 0, 1, 1, 0, 1, 1, 1]) PrXZ = PMF(JointVariables(V0, V1)) PrXZ.IDs(1000, 1111) assert PrXZ.IDs() == (1000, 1111) assert PrXZ.p((0, 0)) == 2 / 8 assert PrXZ.p((0, 1)) == 1 / 8 assert PrXZ.p((1, 0)) == 1 / 8 assert PrXZ.p((1, 1)) == 4 / 8
def test_pmf_summing_over_variable(): V0 = Variable([0, 1, 1, 1, 0, 1, 0, 1]) V1 = Variable([0, 0, 1, 1, 0, 1, 1, 1]) V2 = Variable([0, 0, 0, 0, 1, 0, 1, 1]) V3 = Variable([0, 0, 0, 0, 0, 0, 1, 1]) V0.ID = 1000 V1.ID = 1111 V2.ID = 1222 V3.ID = 1333 Pr = PMF(JointVariables(V0, V1, V2, V3)) assert Pr.IDs() == (1000, 1111, 1222, 1333) assert Pr.p((0, 0, 0, 0)) == 1 / 8 assert Pr.p((1, 0, 0, 0)) == 1 / 8 assert Pr.p((1, 1, 0, 0)) == 3 / 8 assert Pr.p((0, 0, 1, 0)) == 1 / 8 assert Pr.p((0, 1, 1, 1)) == 1 / 8 assert Pr.p((1, 1, 1, 1)) == 1 / 8 Pr = Pr.sum_over(V2.ID) assert sum(Pr.probabilities.values()) == 1 assert Pr.p((0, 0, 0)) == 2 / 8 assert Pr.p((1, 0, 0)) == 1 / 8 assert Pr.p((1, 1, 0)) == 3 / 8 assert Pr.p((0, 1, 1)) == 1 / 8 assert Pr.p((1, 1, 1)) == 1 / 8 assert Pr.IDs() == (V0.ID, V1.ID, V3.ID) Pr = Pr.sum_over(V1.ID) assert sum(Pr.probabilities.values()) == 1 assert Pr.p((0, 0)) == 2 / 8 assert Pr.p((1, 0)) == 4 / 8 assert Pr.p((0, 1)) == 1 / 8 assert Pr.p((1, 1)) == 1 / 8 assert Pr.IDs() == (V0.ID, V3.ID) Pr = Pr.sum_over(V0.ID) assert sum(Pr.probabilities.values()) == 1 print(Pr.probabilities) assert Pr.p(0) == 6 / 8 assert Pr.p(1) == 2 / 8 assert Pr.IDs() == (V3.ID,)
def test_single_variable_pmf(): variable = Variable(numpy.array([3, 5, 1, 1, 4, 3, 7, 0, 2, 1, 0, 5, 4, 7, 2, 4])) variable.ID = 1 variable.name = 'test_variable_1' variable.update_values() assert [0, 1, 2, 3, 4, 5, 7] == variable.values PrVariable = PMF(variable) expected_counts = {0: 2, 1: 3, 2: 2, 3: 2, 4: 3, 5: 2, 7: 2} assert PrVariable.value_counts == expected_counts expected_counts = {0: 2 / 16, 1: 3 / 16, 2: 2 / 16, 3: 2 / 16, 4: 3 / 16, 5: 2 / 16, 7: 2 / 16} assert PrVariable.probabilities == expected_counts assert 1 == sum(PrVariable.values()) assert 2 / 16 == PrVariable.p(3) assert 2 / 16 == PrVariable.p(2) assert 2 / 16 == PrVariable.p(5) ev = 0 for (v, pv) in PrVariable.items(): ev += pv * v assert 3.0625 == ev
def mutual_information( PrXY: PMF, PrX: PMF, PrY: PMF, base=2, ) -> float: logarithm = create_logarithm_function(base) MI = 0.0 for (x, px) in PrX.items(): for (y, py) in PrY.items(): pxy = PrXY.p(x, y) if pxy == 0 or px == 0 or py == 0: continue else: pMI = pxy * logarithm(pxy / (px * py)) MI += pMI return MI