Пример #1
0
    def make_cpmf_PrXcZ(self, X, Z, PrZ=None):
        if PrZ is None:
            PrZ = self.AD_tree.make_pmf(list(Z))

        unsorted_variables = [X] + Z
        joint_variables = sorted(unsorted_variables)
        index = {var: joint_variables.index(var) for var in joint_variables}

        PrXZ = self.AD_tree.make_pmf(joint_variables)

        PrXcZ = CPMF(None, None)

        for joint_key, joint_p in PrXZ.items():
            zkey = tuple([joint_key[index[zvar]] for zvar in Z])
            varkey = [
                joint_key[index[var]] for var in unsorted_variables
                if var not in Z
            ][0]
            if len(zkey) == 1:
                zkey = zkey[0]
            try:
                pmf = PrXcZ.conditional_probabilities[zkey]
            except KeyError:
                pmf = PMF(None)
                PrXcZ.conditional_probabilities[zkey] = pmf
            try:
                pmf.probabilities[varkey] = joint_p / PrZ.p(zkey)
            except ZeroDivisionError:
                pass

        return (PrXcZ, PrXZ)
Пример #2
0
def calculate_pmf_for_cmi(
    X: Variable,
    Y: Variable,
    Z: Union[Variable, JointVariables],
) -> tuple[CPMF, CPMF, CPMF, PMF]:

    PrXYcZ = CPMF(JointVariables(X, Y), Z)
    PrXcZ = CPMF(X, Z)
    PrYcZ = CPMF(Y, Z)
    PrZ = PMF(Z)

    return (PrXYcZ, PrXcZ, PrYcZ, PrZ)
Пример #3
0
def conditional_mutual_information(
    PrXYcZ: CPMF,
    PrXcZ: CPMF,
    PrYcZ: CPMF,
    PrZ: PMF,
    base: Union[float, str] = 2,
) -> float:

    logarithm = create_logarithm_function(base)
    cMI = 0.0
    for (z, pz) in PrZ.items():
        for (x, pxcz) in PrXcZ.given(z).items():
            for (y, pycz) in PrYcZ.given(z).items():
                pxycz = PrXYcZ.given(z).p(x, y)
                if pxycz == 0 or pxcz == 0 or pycz == 0:
                    continue
                else:
                    pcMI = pz * pxycz * logarithm(pxycz / (pxcz * pycz))
                    cMI += pcMI
    return abs(cMI)
Пример #4
0
def assert_cpmf_adtree_vs_dm(dm, adtree, cd_vars, cn_vars):
    if isinstance(cd_vars, int):
        cd_vars = [cd_vars]

    if isinstance(cn_vars, int):
        cn_vars = [cn_vars]

    cd_vars = dm.get_variables('X', cd_vars)
    cn_vars = dm.get_variables('X', cn_vars)
    expected_cpmf = CPMF(cd_vars, cn_vars)

    calculated_cpmf = adtree.make_cpmf(cd_vars, cn_vars)

    assert expected_cpmf == calculated_cpmf
Пример #5
0
def test_conditional_pmf__from_bayesian_network():
    configuration = dict()
    configuration['sourcepath'] = testutil.bif_folder / 'survey.bif'
    configuration['sample_count'] = int(4e4)
    # Using a random seed of 42 somehow requires 2e6 samples to pass, but
    # with the seed 1984, it is sufficient to generate only 4e4. Maybe the
    # random generator is biased somehow?
    configuration['random_seed'] = 1984
    configuration['values_as_indices'] = False
    configuration['objectives'] = ['R', 'TRN']

    bayesian_network = BayesianNetwork.from_bif_file(configuration['sourcepath'], use_cache=False)
    bayesian_network.finalize()

    sbnds = SampledBayesianNetworkDatasetSource(configuration)
    sbnds.reset_random_seed = True
    datasetmatrix = sbnds.create_dataset_matrix('test_sbnds')

    assert ['AGE', 'EDU', 'OCC', 'SEX'] == datasetmatrix.column_labels_X
    assert ['R', 'TRN'] == datasetmatrix.column_labels_Y

    AGE = Variable(datasetmatrix.get_column_by_label('X', 'AGE'))
    PrAge = PMF(AGE)

    SEX = Variable(datasetmatrix.get_column_by_label('X', 'SEX'))
    PrSex = PMF(SEX)

    assert_PMF_AlmostEquals_BNProbDist(
        bayesian_network.variable_nodes['AGE'].probdist,
        PrAge)

    assert_PMF_AlmostEquals_BNProbDist(
        bayesian_network.variable_nodes['SEX'].probdist,
        PrSex)

    EDU = Variable(datasetmatrix.get_column_by_label('X', 'EDU'))
    PrEdu = CPMF(EDU, given=JointVariables(AGE, SEX))

    assert_CPMF_AlmostEquals_BNProbDist(
        bayesian_network.variable_nodes['EDU'].probdist,
        PrEdu)

    OCC = Variable(datasetmatrix.get_column_by_label('X', 'OCC'))
    PrOcc = CPMF(OCC, given=EDU)

    assert_CPMF_AlmostEquals_BNProbDist(
        bayesian_network.variable_nodes['OCC'].probdist,
        PrOcc)

    R = Variable(datasetmatrix.get_column_by_label('Y', 'R'))
    PrR = CPMF(R, given=EDU)

    assert_CPMF_AlmostEquals_BNProbDist(
        bayesian_network.variable_nodes['R'].probdist,
        PrR)

    TRN = Variable(datasetmatrix.get_column_by_label('Y', 'TRN'))
    PrTRN = CPMF(TRN, given=JointVariables(OCC, R))

    assert_CPMF_AlmostEquals_BNProbDist(
        bayesian_network.variable_nodes['TRN'].probdist,
        PrTRN)
Пример #6
0
def test_conditional_pmf__multiple_values():
    sizes = Variable(['small', 'small', 'large', 'small', 'normal', 'small'])
    sizes.ID = 1
    sizes.name = 'sizes'

    colors = Variable(['gray', 'yellow', 'brown', 'silver', 'white', 'gray'])
    colors.ID = 2
    colors.name = 'colors'

    animals = Variable(['cat', 'dog', 'cat', 'snake', 'dog', 'cat'])
    animals.ID = 3
    animals.name = 'animals'

    is_pet = Variable(['yes', 'yes', 'yes', 'maybe', 'yes', 'yes'])
    is_pet.ID = 4
    is_pet.name = 'is_pet'

    Pr = CPMF(JointVariables(colors, is_pet), JointVariables(sizes, animals))

    assert Pr.given('small', 'cat').p('gray', 'yes') == 2 / 2
    assert Pr.given('small', 'cat').p('yellow', 'yes') == 0 / 1
    assert Pr.given('small', 'cat').p('brown', 'maybe') == 0 / 1

    assert Pr.given('small', 'dog').p('yellow', 'yes') == 1 / 1
    assert Pr.given('small', 'dog').p('yellow', 'maybe') == 0 / 1
    assert Pr.given('small', 'dog').p('silver', 'maybe') == 0 / 1

    assert Pr.given('large', 'cat').p('brown', 'yes') == 1 / 1
    assert Pr.given('large', 'cat').p('yellow', 'yes') == 0 / 1

    assert Pr.given('small', 'snake').p('silver', 'maybe') == 1 / 1
    assert Pr.given('small', 'snake').p('silver', 'no') == 0 / 1

    assert Pr.given('normal', 'dog').p('white', 'yes') == 1 / 1
    assert Pr.given('normal', 'dog').p('silver', 'yes') == 0 / 1
    assert Pr.given('normal', 'dog').p('yellow', 'maybe') == 0 / 1

    SA = JointVariables(sizes, animals)
    PrAll = CPMF(JointVariables(colors, is_pet), SA)
    PrSA = PMF(SA)
    PrCcSA = CPMF(colors, SA)
    PrIPcSA = CPMF(is_pet, SA)

    test_p_all = 0.0
    test_p_c = 0.0
    test_p_ip = 0.0

    for (sa, psa) in PrSA.items():
        for (c, pcsa) in PrCcSA.given(sa).items():
            test_p_c += pcsa * PrSA.p(sa)
            for (ip, pipsa) in PrIPcSA.given(sa).items():
                pall = PrAll.given(sa).p(c, ip)
                test_p_all += pall * PrSA.p(sa)
                test_p_ip += pipsa * PrSA.p(sa)

    assert almostEqual(1, test_p_all)
    assert almostEqual(1, test_p_c)
    assert almostEqual(1, test_p_ip)
Пример #7
0
def test_conditional_pmf__binary():
    V0 = Variable([0, 1, 0, 1, 0, 1, 0, 1])
    V1 = Variable([0, 0, 1, 1, 0, 0, 1, 1])
    V2 = Variable([0, 0, 0, 0, 1, 1, 1, 1])
    V78 = Variable([0, 0, 0, 0, 0, 0, 1, 1])

    Pr = CPMF(V0, V78)
    assert Pr.given(0).p(0) == 3 / 6
    assert Pr.given(0).p(1) == 3 / 6
    assert Pr.given(1).p(0) == 1 / 2
    assert Pr.given(1).p(1) == 1 / 2

    Pr = CPMF(V2, V78)
    assert Pr.given(0).p(0) == 4 / 6
    assert Pr.given(0).p(1) == 2 / 6
    assert Pr.given(1).p(0) == 0 / 2
    assert Pr.given(1).p(1) == 2 / 2

    Pr = CPMF(V78, V1)
    assert Pr.given(0).p(0) == 4 / 4
    assert Pr.given(0).p(1) == 0 / 4
    assert Pr.given(1).p(0) == 2 / 4
    assert Pr.given(1).p(1) == 2 / 4

    Pr = CPMF(V1, JointVariables(V2, V78))
    assert Pr.given(0, 0).p(0) == 2 / 4
    assert Pr.given(0, 0).p(1) == 2 / 4
    assert Pr.given(0, 1).p(0) == 0 / 1
    assert Pr.given(0, 1).p(1) == 0 / 1
    assert Pr.given(1, 0).p(0) == 2 / 2
    assert Pr.given(1, 0).p(1) == 0 / 2
    assert Pr.given(1, 1).p(0) == 0 / 2
    assert Pr.given(1, 1).p(1) == 2 / 2