Пример #1
0
def test_joint_variables_pmf():
    animals = Variable(['cat', 'dog', 'cat', 'mouse', 'dog', 'cat'])
    animals.ID = 3
    animals.name = 'animals'

    colors = Variable(['gray', 'yellow', 'brown', 'silver', 'white', 'gray'])
    colors.ID = 2
    colors.name = 'colors'

    sizes = Variable(['small', 'small', 'large', 'small', 'normal', 'small'])
    sizes.ID = 1
    sizes.name = 'sizes'

    fauna = JointVariables(sizes, colors, animals)
    fauna.update_values()
    assert [1, 2, 3] == fauna.variableIDs
    assert fauna.variables[0] is sizes
    assert fauna.variables[1] is colors
    assert fauna.variables[2] is animals

    expected_values = [('large', 'brown', 'cat'),
                       ('normal', 'white', 'dog'),
                       ('small', 'gray', 'cat'),
                       ('small', 'silver', 'mouse'),
                       ('small', 'yellow', 'dog')]
    assert fauna.values == expected_values

    PrFauna = PMF(fauna)
    assert PrFauna.p('small', 'gray', 'cat') == 2 / 6
    assert PrFauna.p('small', 'silver', 'mouse') == 1 / 6
    assert PrFauna.p('small', 'silver', 'dog') == 0

    singleton_joint = JointVariables(animals)
    assert ['cat', 'dog', 'cat', 'mouse', 'dog', 'cat'] == singleton_joint.instances()
Пример #2
0
def test_conditional_pmf__multiple_values():
    sizes = Variable(['small', 'small', 'large', 'small', 'normal', 'small'])
    sizes.ID = 1
    sizes.name = 'sizes'

    colors = Variable(['gray', 'yellow', 'brown', 'silver', 'white', 'gray'])
    colors.ID = 2
    colors.name = 'colors'

    animals = Variable(['cat', 'dog', 'cat', 'snake', 'dog', 'cat'])
    animals.ID = 3
    animals.name = 'animals'

    is_pet = Variable(['yes', 'yes', 'yes', 'maybe', 'yes', 'yes'])
    is_pet.ID = 4
    is_pet.name = 'is_pet'

    Pr = CPMF(JointVariables(colors, is_pet), JointVariables(sizes, animals))

    assert Pr.given('small', 'cat').p('gray', 'yes') == 2 / 2
    assert Pr.given('small', 'cat').p('yellow', 'yes') == 0 / 1
    assert Pr.given('small', 'cat').p('brown', 'maybe') == 0 / 1

    assert Pr.given('small', 'dog').p('yellow', 'yes') == 1 / 1
    assert Pr.given('small', 'dog').p('yellow', 'maybe') == 0 / 1
    assert Pr.given('small', 'dog').p('silver', 'maybe') == 0 / 1

    assert Pr.given('large', 'cat').p('brown', 'yes') == 1 / 1
    assert Pr.given('large', 'cat').p('yellow', 'yes') == 0 / 1

    assert Pr.given('small', 'snake').p('silver', 'maybe') == 1 / 1
    assert Pr.given('small', 'snake').p('silver', 'no') == 0 / 1

    assert Pr.given('normal', 'dog').p('white', 'yes') == 1 / 1
    assert Pr.given('normal', 'dog').p('silver', 'yes') == 0 / 1
    assert Pr.given('normal', 'dog').p('yellow', 'maybe') == 0 / 1

    SA = JointVariables(sizes, animals)
    PrAll = CPMF(JointVariables(colors, is_pet), SA)
    PrSA = PMF(SA)
    PrCcSA = CPMF(colors, SA)
    PrIPcSA = CPMF(is_pet, SA)

    test_p_all = 0.0
    test_p_c = 0.0
    test_p_ip = 0.0

    for (sa, psa) in PrSA.items():
        for (c, pcsa) in PrCcSA.given(sa).items():
            test_p_c += pcsa * PrSA.p(sa)
            for (ip, pipsa) in PrIPcSA.given(sa).items():
                pall = PrAll.given(sa).p(c, ip)
                test_p_all += pall * PrSA.p(sa)
                test_p_ip += pipsa * PrSA.p(sa)

    assert almostEqual(1, test_p_all)
    assert almostEqual(1, test_p_c)
    assert almostEqual(1, test_p_ip)
Пример #3
0
def test_make_cpmf_PrXcZ_variant_1() -> None:
    V0 = Variable([0, 1, 1, 1, 0, 1, 0, 1])
    V1 = Variable([0, 0, 1, 1, 0, 1, 1, 1])

    PrXZ = PMF(JointVariables(V0, V1))
    PrXZ.IDs(1000, 1111)

    assert PrXZ.IDs() == (1000, 1111)

    assert PrXZ.p((0, 0)) == 2 / 8
    assert PrXZ.p((0, 1)) == 1 / 8
    assert PrXZ.p((1, 0)) == 1 / 8
    assert PrXZ.p((1, 1)) == 4 / 8
Пример #4
0
def test_pmf_summing_over_variable():
    V0 = Variable([0, 1, 1, 1, 0, 1, 0, 1])
    V1 = Variable([0, 0, 1, 1, 0, 1, 1, 1])
    V2 = Variable([0, 0, 0, 0, 1, 0, 1, 1])
    V3 = Variable([0, 0, 0, 0, 0, 0, 1, 1])

    V0.ID = 1000
    V1.ID = 1111
    V2.ID = 1222
    V3.ID = 1333

    Pr = PMF(JointVariables(V0, V1, V2, V3))
    assert Pr.IDs() == (1000, 1111, 1222, 1333)

    assert Pr.p((0, 0, 0, 0)) == 1 / 8
    assert Pr.p((1, 0, 0, 0)) == 1 / 8
    assert Pr.p((1, 1, 0, 0)) == 3 / 8
    assert Pr.p((0, 0, 1, 0)) == 1 / 8
    assert Pr.p((0, 1, 1, 1)) == 1 / 8
    assert Pr.p((1, 1, 1, 1)) == 1 / 8

    Pr = Pr.sum_over(V2.ID)
    assert sum(Pr.probabilities.values()) == 1

    assert Pr.p((0, 0, 0)) == 2 / 8
    assert Pr.p((1, 0, 0)) == 1 / 8
    assert Pr.p((1, 1, 0)) == 3 / 8
    assert Pr.p((0, 1, 1)) == 1 / 8
    assert Pr.p((1, 1, 1)) == 1 / 8
    assert Pr.IDs() == (V0.ID, V1.ID, V3.ID)

    Pr = Pr.sum_over(V1.ID)
    assert sum(Pr.probabilities.values()) == 1

    assert Pr.p((0, 0)) == 2 / 8
    assert Pr.p((1, 0)) == 4 / 8
    assert Pr.p((0, 1)) == 1 / 8
    assert Pr.p((1, 1)) == 1 / 8
    assert Pr.IDs() == (V0.ID, V3.ID)

    Pr = Pr.sum_over(V0.ID)
    assert sum(Pr.probabilities.values()) == 1

    print(Pr.probabilities)

    assert Pr.p(0) == 6 / 8
    assert Pr.p(1) == 2 / 8
    assert Pr.IDs() == (V3.ID,)
Пример #5
0
def test_single_variable_pmf():
    variable = Variable(numpy.array([3, 5, 1, 1, 4, 3, 7, 0, 2, 1, 0, 5, 4, 7, 2, 4]))
    variable.ID = 1
    variable.name = 'test_variable_1'

    variable.update_values()
    assert [0, 1, 2, 3, 4, 5, 7] == variable.values

    PrVariable = PMF(variable)
    expected_counts = {0: 2,
                       1: 3,
                       2: 2,
                       3: 2,
                       4: 3,
                       5: 2,
                       7: 2}
    assert PrVariable.value_counts == expected_counts

    expected_counts = {0: 2 / 16,
                       1: 3 / 16,
                       2: 2 / 16,
                       3: 2 / 16,
                       4: 3 / 16,
                       5: 2 / 16,
                       7: 2 / 16}
    assert PrVariable.probabilities == expected_counts

    assert 1 == sum(PrVariable.values())

    assert 2 / 16 == PrVariable.p(3)
    assert 2 / 16 == PrVariable.p(2)
    assert 2 / 16 == PrVariable.p(5)

    ev = 0
    for (v, pv) in PrVariable.items():
        ev += pv * v

    assert 3.0625 == ev
Пример #6
0
def mutual_information(
    PrXY: PMF,
    PrX: PMF,
    PrY: PMF,
    base=2,
) -> float:

    logarithm = create_logarithm_function(base)
    MI = 0.0
    for (x, px) in PrX.items():
        for (y, py) in PrY.items():
            pxy = PrXY.p(x, y)
            if pxy == 0 or px == 0 or py == 0:
                continue
            else:
                pMI = pxy * logarithm(pxy / (px * py))
                MI += pMI
    return MI