Пример #1
0
def test_add_cpds():
    spbn = SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'),
                             ('b', 'd'), ('c', 'd')], [('d', pbn.CKDEType())])

    assert spbn.node_type('a') == pbn.UnknownFactorType()
    spbn.add_cpds([CKDE('a', [])])
    assert spbn.node_type('a') == pbn.CKDEType()

    with pytest.raises(ValueError) as ex:
        spbn.add_cpds([LinearGaussianCPD('d', ['a', 'b', 'c'])])
    assert "Bayesian network expects type" in str(ex.value)

    lg = LinearGaussianCPD('b', ['a'], [2.5, 1.65], 4)
    ckde = CKDE('d', ['a', 'b', 'c'])
    assert lg.fitted()
    assert not ckde.fitted()

    spbn.add_cpds([lg, ckde])

    spbn.set_node_type('a', pbn.UnknownFactorType())
    with pytest.raises(ValueError) as ex:
        not spbn.cpd('a').fitted()
    assert "CPD of variable \"a\" not added. Call add_cpds() or fit() to add the CPD." in str(
        ex.value)

    assert spbn.cpd('b').fitted()

    with pytest.raises(ValueError) as ex:
        not spbn.cpd('c').fitted()
    assert "CPD of variable \"c\" not added. Call add_cpds() or fit() to add the CPD." in str(
        ex.value)

    assert not spbn.cpd('d').fitted()
Пример #2
0
def dyn_other_fit_bytes():
    variables = ["a", "b", "c", "d"]
    static_nodes = [v + "_t_" + str(m) for v in variables for m in range(1, 3)]
    transition_nodes = [v + "_t_0" for v in variables]

    other_static = OtherBN(static_nodes, [("a_t_2", "d_t_1")],
                           [("b_t_2", pbn.DiscreteFactorType()),
                            ("b_t_1", pbn.DiscreteFactorType()),
                            ("c_t_1", pbn.CKDEType()),
                            ("d_t_1", pbn.LinearGaussianCPDType())])
    lg = LinearGaussianCPD("d_t_1", ["a_t_2"], [1, 2], 2)
    other_static.add_cpds([lg])

    other_transition = ConditionalOtherBN(
        transition_nodes, static_nodes, [("a_t_2", "d_t_0")],
        [("b_t_0", pbn.DiscreteFactorType()), ("c_t_0", pbn.CKDEType()),
         ("d_t_0", pbn.LinearGaussianCPDType())])
    lg = LinearGaussianCPD("d_t_0", ["a_t_2"], [3, 4], 1.5)
    other_transition.add_cpds([lg])

    assert other_static.type() == other_transition.type()

    dyn_other = DynamicOtherBN(variables, 2, other_static, other_transition)
    df_continuous = util_test.generate_normal_data_indep(1000)
    df_discrete = util_test.generate_discrete_data_dependent(1000)
    df = df_continuous
    df["b"] = df_discrete["B"]
    dyn_other.fit(df)
    dyn_other.include_cpd = True
    return pickle.dumps(dyn_other)
Пример #3
0
def test_serialization_dbn_model(dyn_gaussian_bytes, dyn_spbn_bytes,
                                 dyn_kde_bytes, dyn_discrete_bytes,
                                 dyn_genericbn_bytes, dyn_newbn_bytes,
                                 dyn_otherbn_bytes):
    loaded_g = pickle.loads(dyn_gaussian_bytes)
    assert set(loaded_g.variables()) == set(["a", "b", "c", "d"])
    assert loaded_g.static_bn().arcs() == [("a_t_2", "d_t_1")]
    assert loaded_g.transition_bn().arcs() == [("c_t_2", "b_t_0")]
    assert loaded_g.type() == pbn.GaussianNetworkType()

    loaded_s = pickle.loads(dyn_spbn_bytes)
    assert set(loaded_s.variables()) == set(["a", "b", "c", "d"])
    assert loaded_s.static_bn().arcs() == [("a_t_2", "d_t_1")]
    assert loaded_s.transition_bn().arcs() == [("c_t_2", "b_t_0")]
    assert loaded_s.type() == pbn.SemiparametricBNType()
    node_types = {
        v + "_t_0": pbn.UnknownFactorType()
        for v in loaded_s.variables()
    }
    node_types["b_t_0"] = pbn.CKDEType()
    assert loaded_s.transition_bn().node_types() == node_types

    loaded_k = pickle.loads(dyn_kde_bytes)
    assert set(loaded_k.variables()) == set(["a", "b", "c", "d"])
    assert loaded_k.static_bn().arcs() == [("a_t_2", "d_t_1")]
    assert loaded_k.transition_bn().arcs() == [("c_t_2", "b_t_0")]
    assert loaded_k.type() == pbn.KDENetworkType()

    loaded_d = pickle.loads(dyn_discrete_bytes)
    assert set(loaded_d.variables()) == set(["a", "b", "c", "d"])
    assert loaded_d.static_bn().arcs() == [("a_t_2", "d_t_1")]
    assert loaded_d.transition_bn().arcs() == [("c_t_2", "b_t_0")]
    assert loaded_d.type() == pbn.DiscreteBNType()

    loaded_gen = pickle.loads(dyn_genericbn_bytes)
    assert set(loaded_gen.variables()) == set(["a", "b", "c", "d"])
    assert loaded_gen.static_bn().arcs() == [("a_t_2", "d_t_1")]
    assert loaded_gen.transition_bn().arcs() == [("a_t_2", "b_t_0")]
    assert loaded_gen.type() == MyRestrictedGaussianNetworkType()

    loaded_nn = pickle.loads(dyn_newbn_bytes)
    assert set(loaded_nn.variables()) == set(["a", "b", "c", "d"])
    assert loaded_nn.static_bn().arcs() == [("a_t_2", "d_t_1")]
    assert loaded_nn.transition_bn().arcs() == [("a_t_2", "b_t_0")]
    assert loaded_nn.type() == MyRestrictedGaussianNetworkType()

    loaded_other = pickle.loads(dyn_otherbn_bytes)
    assert set(loaded_other.variables()) == set(["a", "b", "c", "d"])
    assert loaded_other.static_bn().arcs() == [("a_t_2", "d_t_1")]
    assert loaded_other.transition_bn().arcs() == [("a_t_2", "b_t_0")]
    assert loaded_other.type() == NonHomogeneousType()
    assert loaded_other.extra_info == "extra"

    assert loaded_other.static_bn().node_type(
        "c_t_1") == pbn.DiscreteFactorType()
    assert loaded_other.static_bn().node_type("d_t_1") == pbn.CKDEType()
    assert loaded_other.transition_bn().node_type("d_t_0") == pbn.CKDEType()
Пример #4
0
def dyn_otherbn_bytes():
    other = DynamicOtherBN(["a", "b", "c", "d"], 2)
    other.static_bn().add_arc("a_t_2", "d_t_1")
    other.static_bn().set_node_type("c_t_1", pbn.DiscreteFactorType())
    other.static_bn().set_node_type("d_t_1", pbn.CKDEType())

    other.transition_bn().add_arc("a_t_2", "b_t_0")
    other.transition_bn().set_node_type("d_t_0", pbn.CKDEType())
    return pickle.dumps(other)
Пример #5
0
def test_holdout_local_score_null_spbn():
    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    np.random.seed(0)
    a_null = np.random.randint(0, SIZE, size=100)
    b_null = np.random.randint(0, SIZE, size=100)
    c_null = np.random.randint(0, SIZE, size=100)
    d_null = np.random.randint(0, SIZE, size=100)

    df_null = df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    hl = pbn.HoldoutLikelihood(df_null, 0.2, seed)

    assert np.isclose(
        hl.local_score(spbn, 'a', []),
        numpy_local_score(pbn.CKDEType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'a', []))
    assert np.isclose(
        hl.local_score(spbn, 'b', ['a']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'b', ['a']))
    assert np.isclose(
        hl.local_score(spbn, 'c', ['a', 'b']),
        numpy_local_score(pbn.CKDEType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'c', ['a', 'b']))
    assert np.isclose(
        hl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'd', ['a', 'b', 'c']))
    assert np.isclose(
        hl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'd', ['b', 'c', 'a']))

    assert hl.local_score(spbn, 'a') == hl.local_score(spbn, 'a',
                                                       spbn.parents('a'))
    assert hl.local_score(spbn, 'b') == hl.local_score(spbn, 'b',
                                                       spbn.parents('b'))
    assert hl.local_score(spbn, 'c') == hl.local_score(spbn, 'c',
                                                       spbn.parents('c'))
    assert hl.local_score(spbn, 'd') == hl.local_score(spbn, 'd',
                                                       spbn.parents('d'))
Пример #6
0
def test_serialization_bn_model(gaussian_bytes, spbn_bytes, kde_bytes,
                                discrete_bytes, genericbn_bytes, newbn_bytes,
                                otherbn_bytes):
    loaded_g = pickle.loads(gaussian_bytes)
    assert set(loaded_g.nodes()) == set(["a", "b", "c", "d"])
    assert loaded_g.arcs() == [("a", "b")]
    assert loaded_g.type() == pbn.GaussianNetworkType()

    loaded_s = pickle.loads(spbn_bytes)
    assert set(loaded_s.nodes()) == set(["a", "b", "c", "d"])
    assert loaded_s.arcs() == [("a", "b")]
    assert loaded_s.type() == pbn.SemiparametricBNType()
    assert loaded_s.node_types() == {
        'a': pbn.UnknownFactorType(),
        'b': pbn.CKDEType(),
        'c': pbn.UnknownFactorType(),
        'd': pbn.UnknownFactorType()
    }

    loaded_k = pickle.loads(kde_bytes)
    assert set(loaded_k.nodes()) == set(["a", "b", "c", "d"])
    assert loaded_k.arcs() == [("a", "b")]
    assert loaded_k.type() == pbn.KDENetworkType()

    loaded_d = pickle.loads(discrete_bytes)
    assert set(loaded_d.nodes()) == set(["a", "b", "c", "d"])
    assert loaded_d.arcs() == [("a", "b")]
    assert loaded_d.type() == pbn.DiscreteBNType()

    loaded_gen = pickle.loads(genericbn_bytes)
    assert set(loaded_gen.nodes()) == set(["a", "b", "c", "d"])
    assert loaded_gen.arcs() == [("a", "b")]
    assert loaded_gen.type() == MyRestrictedGaussianNetworkType()

    loaded_nn = pickle.loads(newbn_bytes)
    assert set(loaded_g.nodes()) == set(["a", "b", "c", "d"])
    assert loaded_nn.arcs() == [("a", "b")]
    assert loaded_nn.type() == MyRestrictedGaussianNetworkType()

    loaded_o = pickle.loads(otherbn_bytes)
    assert set(loaded_g.nodes()) == set(["a", "b", "c", "d"])
    assert loaded_o.arcs() == [("a", "b")]
    assert loaded_o.type() == NonHomogeneousType()
    assert loaded_o.node_types() == {
        'a': pbn.UnknownFactorType(),
        'b': pbn.LinearGaussianCPDType(),
        'c': pbn.CKDEType(),
        'd': pbn.DiscreteFactorType()
    }
    assert loaded_o.extra_info == "extra"

    assert loaded_nn.type() != loaded_o.type()
Пример #7
0
def test_cvl_local_score_spbn():
    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    cvl = pbn.CVLikelihood(df, 10, seed)

    assert np.isclose(cvl.local_score(spbn, 'a', []),
                      numpy_local_score(pbn.CKDEType(), df, 'a', []))
    assert np.isclose(
        cvl.local_score(spbn, 'b', ['a']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'b', ['a']))
    assert np.isclose(cvl.local_score(spbn, 'c', ['a', 'b']),
                      numpy_local_score(pbn.CKDEType(), df, 'c', ['a', 'b']))
    assert np.isclose(
        cvl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'd',
                          ['a', 'b', 'c']))
    assert np.isclose(
        cvl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'd',
                          ['b', 'c', 'a']))

    assert cvl.local_score(spbn,
                           'a') == cvl.local_score(spbn, 'a',
                                                   spbn.parents('a'))
    assert cvl.local_score(spbn,
                           'b') == cvl.local_score(spbn, 'b',
                                                   spbn.parents('b'))
    assert cvl.local_score(spbn,
                           'c') == cvl.local_score(spbn, 'c',
                                                   spbn.parents('c'))
    assert cvl.local_score(spbn,
                           'd') == cvl.local_score(spbn, 'd',
                                                   spbn.parents('d'))

    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'a', []),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'a', []))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'b', ['a']),
        numpy_local_score(pbn.CKDEType(), df, 'b', ['a']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'c',
                                  ['a', 'b']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'c', ['a', 'b']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.CKDEType(), df, 'd', ['a', 'b', 'c']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.CKDEType(), df, 'd', ['b', 'c', 'a']))
Пример #8
0
def test_node_type():
    spbn = SemiparametricBN(['a', 'b', 'c', 'd'])
    assert spbn.num_nodes() == 4
    assert spbn.num_arcs() == 0
    assert spbn.nodes() == ['a', 'b', 'c', 'd']

    for n in spbn.nodes():
        assert spbn.node_type(n) == pbn.UnknownFactorType()

    spbn.set_node_type('b', pbn.CKDEType())
    assert spbn.node_type('b') == pbn.CKDEType()
    spbn.set_node_type('b', pbn.LinearGaussianCPDType())
    assert spbn.node_type('b') == pbn.LinearGaussianCPDType()
Пример #9
0
def test_apply():
    gbn = pbn.GaussianNetwork(['a', 'b', 'c', 'd'])
    assert gbn.num_arcs() == 0
    assert not gbn.has_arc('a', 'b')

    o = pbn.AddArc("a", "b", 1)
    o.apply(gbn)
    assert gbn.num_arcs() == 1
    assert gbn.has_arc('a', 'b')

    o = pbn.FlipArc("a", "b", 1)
    o.apply(gbn)
    assert gbn.num_arcs() == 1
    assert not gbn.has_arc('a', 'b')
    assert gbn.has_arc('b', 'a')

    o = pbn.RemoveArc("b", "a", 1)
    o.apply(gbn)
    assert gbn.num_arcs() == 0
    assert not gbn.has_arc('b', 'a')

    o = pbn.ChangeNodeType("a", pbn.CKDEType(), 1)
    with pytest.raises(ValueError) as ex:
        o.apply(gbn)
    assert "Wrong factor type" in str(ex.value)

    spbn = pbn.SemiparametricBN(['a', 'b', 'c', 'd'])
    assert spbn.num_arcs() == 0

    o = pbn.ChangeNodeType("a", pbn.CKDEType(), 1)
    assert (spbn.node_type('a') == pbn.UnknownFactorType())
    o.apply(spbn)
    assert (spbn.node_type('a') == pbn.CKDEType())

    assert not spbn.has_arc('a', 'b')
    o = pbn.AddArc("a", "b", 1)
    o.apply(spbn)
    assert spbn.num_arcs() == 1
    assert spbn.has_arc('a', 'b')

    o = pbn.FlipArc("a", "b", 1)
    o.apply(spbn)
    assert spbn.num_arcs() == 1
    assert not spbn.has_arc('a', 'b')
    assert spbn.has_arc('b', 'a')

    o = pbn.RemoveArc("b", "a", 1)
    o.apply(spbn)
    assert spbn.num_arcs() == 0
    assert not spbn.has_arc('b', 'a')
Пример #10
0
def test_factor_type():
    lg1 = pbn.LinearGaussianCPD("a", [])
    lg2 = pbn.LinearGaussianCPD("b", ["a"])
    lg3 = pbn.LinearGaussianCPD("c", ["b", "a"])

    assert lg1.type() == pbn.LinearGaussianCPDType()
    assert lg1.type() == lg2.type()
    assert lg1.type() == lg3.type()
    assert lg2.type() == lg3.type()

    c1 = pbn.CKDE("a", [])
    c2 = pbn.CKDE("b", ["a"])
    c3 = pbn.CKDE("c", ["b", "a"])

    assert c1.type() == pbn.CKDEType()
    assert c1.type() == c2.type()
    assert c1.type() == c3.type()
    assert c2.type() == c3.type()

    d1 = pbn.DiscreteFactor("a", [])
    d2 = pbn.DiscreteFactor("b", ["a"])
    d3 = pbn.DiscreteFactor("c", ["b", "a"])

    assert d1.type() == pbn.DiscreteFactorType()
    assert d1.type() == d2.type()
    assert d1.type() == d3.type()
    assert d2.type() == d3.type()

    assert lg1.type() != c1.type()
    assert lg1.type() != d1.type()
    assert c1.type() != d1.type()
def test_serialization_factor_type(lg_type_bytes, ckde_type_bytes,
                                   discrete_type_bytes, new_type_bytes,
                                   other_type_bytes):
    loaded_lg = pickle.loads(lg_type_bytes)
    new_lg = pbn.LinearGaussianCPDType()
    assert new_lg == loaded_lg

    loaded_ckde = pickle.loads(ckde_type_bytes)
    new_ckde = pbn.CKDEType()
    assert loaded_ckde == new_ckde

    loaded_discrete = pickle.loads(discrete_type_bytes)
    new_discrete = pbn.DiscreteFactorType()
    assert loaded_discrete == new_discrete

    loaded_new = pickle.loads(new_type_bytes)
    new_new = NewType()
    assert loaded_new == new_new

    loaded_other = pickle.loads(other_type_bytes)
    new_other = OtherType()
    assert loaded_other == new_other

    assert new_lg != new_ckde
    assert new_lg != new_discrete
    assert new_lg != new_new
    assert new_lg != new_other
    assert new_ckde != new_discrete
    assert new_ckde != new_new
    assert new_ckde != new_other
    assert new_discrete != new_new
    assert new_discrete != new_other
    assert new_new != new_other
Пример #12
0
def test_opposite():
    bn = pbn.SemiparametricBN(["a", "b"])
    o = pbn.AddArc("a", "b", 1)
    oppo = o.opposite(bn)
    assert oppo.source() == 'a'
    assert oppo.target() == 'b'
    assert oppo.delta() == -1
    assert type(oppo) == pbn.RemoveArc

    o = pbn.RemoveArc("a", "b", 1)
    oppo = o.opposite(bn)
    assert oppo.source() == 'a'
    assert oppo.target() == 'b'
    assert oppo.delta() == -1
    assert type(oppo) == pbn.AddArc

    o = pbn.FlipArc("a", "b", 1)
    oppo = o.opposite(bn)
    assert oppo.source() == 'b'
    assert oppo.target() == 'a'
    assert oppo.delta() == -1
    assert type(oppo) == pbn.FlipArc

    bn.set_node_type("a", pbn.LinearGaussianCPDType())
    o = pbn.ChangeNodeType("a", pbn.CKDEType(), 1)
    oppo = o.opposite(bn)
    assert oppo.node() == 'a'
    assert oppo.node_type() == pbn.LinearGaussianCPDType()
    assert oppo.delta() == -1
    assert type(oppo) == pbn.ChangeNodeType
Пример #13
0
def cond_other_partial_fit_bytes():
    other = ConditionalOtherBN(["c", "d"], ["a", "b"], [("a", "c")],
                               [("c", pbn.CKDEType()),
                                ("d", pbn.LinearGaussianCPDType())])
    lg = LinearGaussianCPD("d", [], [3], 1.5)
    other.add_cpds([lg])
    other.include_cpd = True
    return pickle.dumps(other)
Пример #14
0
def other_partial_fit_bytes():
    other = OtherBN(["a", "b", "c", "d"], [("a", "b")],
                    [("b", pbn.LinearGaussianCPDType()), ("c", pbn.CKDEType()),
                     ("d", pbn.DiscreteFactorType())])
    lg = LinearGaussianCPD("b", ["a"], [1, 2], 2)
    other.add_cpds([lg])
    other.include_cpd = True
    return pickle.dumps(other)
Пример #15
0
def test_cpd():
    spbn = SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'),
                             ('b', 'd'), ('c', 'd')], [('d', pbn.CKDEType())])

    with pytest.raises(ValueError) as ex:
        spbn.cpd('a')
    assert "not added" in str(ex.value)

    spbn.fit(df)

    assert spbn.cpd('a').type() == pbn.LinearGaussianCPDType()
    assert spbn.cpd('b').type() == pbn.LinearGaussianCPDType()
    assert spbn.cpd('c').type() == pbn.LinearGaussianCPDType()
    assert spbn.cpd('d').type() == pbn.CKDEType()

    assert spbn.cpd('a').fitted()
    assert spbn.cpd('b').fitted()
    assert spbn.cpd('c').fitted()
    assert spbn.cpd('d').fitted()
Пример #16
0
def test_holdout_score():
    gbn = pbn.GaussianNetwork([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'),
                               ('b', 'd'), ('c', 'd')])

    hl = pbn.HoldoutLikelihood(df, 0.2, 0)

    assert np.isclose(
        hl.score(gbn),
        (hl.local_score(gbn, 'a', []) + hl.local_score(gbn, 'b', ['a']) +
         hl.local_score(gbn, 'c', ['a', 'b']) +
         hl.local_score(gbn, 'd', ['a', 'b', 'c'])))

    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    assert np.isclose(hl.score(spbn),
                      (hl.local_score(spbn, 'a') + hl.local_score(spbn, 'b') +
                       hl.local_score(spbn, 'c') + hl.local_score(spbn, 'd')))
Пример #17
0
def test_cvl_score():
    gbn = pbn.GaussianNetwork([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'),
                               ('b', 'd'), ('c', 'd')])

    cv = pbn.CVLikelihood(df, 10, 0)

    assert np.isclose(
        cv.score(gbn),
        (cv.local_score(gbn, 'a', []) + cv.local_score(gbn, 'b', ['a']) +
         cv.local_score(gbn, 'c', ['a', 'b']) +
         cv.local_score(gbn, 'd', ['a', 'b', 'c'])))

    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    assert np.isclose(cv.score(spbn),
                      (cv.local_score(spbn, 'a') + cv.local_score(spbn, 'b') +
                       cv.local_score(spbn, 'c') + cv.local_score(spbn, 'd')))
Пример #18
0
def test_create():
    o = pbn.AddArc("a", "b", 1)
    assert o.source() == 'a'
    assert o.target() == 'b'
    assert o.delta() == 1

    o = pbn.RemoveArc("a", "b", 2)
    assert o.source() == 'a'
    assert o.target() == 'b'
    assert o.delta() == 2

    o = pbn.FlipArc("a", "b", 3)
    assert o.source() == 'a'
    assert o.target() == 'b'
    assert o.delta() == 3

    o = pbn.ChangeNodeType("a", pbn.CKDEType(), 4)
    assert o.node() == 'a'
    assert o.node_type() == pbn.CKDEType()
    assert o.delta() == 4
Пример #19
0
def test_holdout_local_score_spbn():
    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    hl = pbn.HoldoutLikelihood(df, 0.2, seed)

    assert np.isclose(
        hl.local_score(spbn, 'a', []),
        numpy_local_score(pbn.CKDEType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'a', []))
    assert np.isclose(
        hl.local_score(spbn, 'b', ['a']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'b', ['a']))
    assert np.isclose(
        hl.local_score(spbn, 'c', ['a', 'b']),
        numpy_local_score(pbn.CKDEType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'c', ['a', 'b']))
    assert np.isclose(
        hl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'd', ['a', 'b', 'c']))
    assert np.isclose(
        hl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'd', ['b', 'c', 'a']))

    assert hl.local_score(spbn, 'a') == hl.local_score(spbn, 'a',
                                                       spbn.parents('a'))
    assert hl.local_score(spbn, 'b') == hl.local_score(spbn, 'b',
                                                       spbn.parents('b'))
    assert hl.local_score(spbn, 'c') == hl.local_score(spbn, 'c',
                                                       spbn.parents('c'))
    assert hl.local_score(spbn, 'd') == hl.local_score(spbn, 'd',
                                                       spbn.parents('d'))
Пример #20
0
def numpy_local_score(node_type, data, variable, evidence):
    cv = pbn.CrossValidation(data, 10, seed)
    loglik = 0
    for train_df, test_df in cv:
        if isinstance(variable, str):
            node_data = train_df.to_pandas().loc[:, [variable] +
                                                 evidence].dropna()
            variable_data = node_data.loc[:, variable]
            evidence_data = node_data.loc[:, evidence]
            test_node_data = test_df.to_pandas().loc[:, [variable] +
                                                     evidence].dropna()
            test_variable_data = test_node_data.loc[:, variable]
            test_evidence_data = test_node_data.loc[:, evidence]
        else:
            node_data = train_df.to_pandas().iloc[:, [variable] +
                                                  evidence].dropna()
            variable_data = node_data.iloc[:, 0]
            evidence_data = node_data.iloc[:, 1:]
            test_node_data = test_df.to_pandas().iloc[:, [variable] +
                                                      evidence].dropna()
            test_variable_data = test_node_data.iloc[:, 0]
            test_evidence_data = test_node_data.iloc[:, 1:]

        if node_type == pbn.LinearGaussianCPDType():
            N = variable_data.shape[0]
            d = evidence_data.shape[1]
            linregress_data = np.column_stack(
                (np.ones(N), evidence_data.to_numpy()))
            (beta, res, _, _) = np.linalg.lstsq(linregress_data,
                                                variable_data.to_numpy(),
                                                rcond=None)
            var = res / (N - d - 1)

            means = beta[0] + np.sum(beta[1:] * test_evidence_data, axis=1)
            loglik += norm.logpdf(test_variable_data, means,
                                  np.sqrt(var)).sum()
        elif node_type == pbn.CKDEType():
            k_joint = gaussian_kde(
                node_data.to_numpy().T,
                bw_method=lambda s: np.power(4 / (s.d + 2), 1 /
                                             (s.d + 4)) * s.scotts_factor())
            if evidence:
                k_marg = gaussian_kde(evidence_data.to_numpy().T,
                                      bw_method=k_joint.covariance_factor())
                loglik += np.sum(
                    k_joint.logpdf(test_node_data.to_numpy().T) -
                    k_marg.logpdf(test_evidence_data.to_numpy().T))
            else:
                loglik += np.sum(k_joint.logpdf(test_node_data.to_numpy().T))

    return loglik
Пример #21
0
def test_serialization_unfitted_factor(lg_bytes, ckde_bytes, discrete_bytes,
                                       new_bytes, newbis_bytes):
    loaded_lg = pickle.loads(lg_bytes)
    assert loaded_lg.variable() == "c"
    assert set(loaded_lg.evidence()) == set(["a", "b"])
    assert not loaded_lg.fitted()
    assert loaded_lg.type() == pbn.LinearGaussianCPDType()

    loaded_ckde = pickle.loads(ckde_bytes)
    assert loaded_ckde.variable() == "c"
    assert set(loaded_ckde.evidence()) == set(["a", "b"])
    assert not loaded_ckde.fitted()
    assert loaded_ckde.type() == pbn.CKDEType()

    loaded_discrete = pickle.loads(discrete_bytes)
    assert loaded_discrete.variable() == "c"
    assert set(loaded_discrete.evidence()) == set(["a", "b"])
    assert not loaded_discrete.fitted()
    assert loaded_discrete.type() == pbn.DiscreteFactorType()

    loaded_new = pickle.loads(new_bytes)
    assert loaded_new.variable() == "c"
    assert set(loaded_new.evidence()) == set(["a", "b"])
    assert not loaded_new.fitted()
    assert type(loaded_new.type()) == NewType
    nn = NewFactor("a", [])
    assert loaded_new.type() == nn.type()

    from pybnesian import GaussianNetwork
    dummy_network = GaussianNetwork(["a", "b", "c", "d"])
    assert type(loaded_new.type().new_factor(dummy_network, "a",
                                             [])) == NewFactor

    loaded_newbis = pickle.loads(newbis_bytes)
    assert loaded_newbis.variable() == "c"
    assert set(loaded_newbis.evidence()) == set(["a", "b"])
    assert not loaded_newbis.fitted()
    assert type(loaded_newbis.type()) == NewType
    nnbis = NewFactorBis("a", [])
    assert loaded_newbis.type() == nnbis.type()
    assert type(loaded_newbis.type().new_factor(dummy_network, "a",
                                                [])) == NewFactorBis

    assert loaded_lg.type() != loaded_ckde.type()
    assert loaded_lg.type() != loaded_discrete.type()
    assert loaded_lg.type() != loaded_new.type()
    assert loaded_ckde.type() != loaded_discrete.type()
    assert loaded_ckde.type() != loaded_new.type()
    assert loaded_discrete.type() != loaded_new.type()
    assert loaded_newbis.type() == loaded_new.type()
Пример #22
0
def dyn_other_partial_fit_bytes():
    variables = ["a", "b", "c", "d"]
    static_nodes = [v + "_t_" + str(m) for v in variables for m in range(1, 3)]
    transition_nodes = [v + "_t_0" for v in variables]

    other_static = OtherBN(static_nodes, [("a_t_2", "d_t_1")],
                           [("b_t_1", pbn.DiscreteFactorType()),
                            ("c_t_1", pbn.CKDEType()),
                            ("d_t_1", pbn.LinearGaussianCPDType())])
    lg = LinearGaussianCPD("d_t_1", ["a_t_2"], [1, 2], 2)
    other_static.add_cpds([lg])

    other_transition = ConditionalOtherBN(
        transition_nodes, static_nodes, [("a_t_2", "d_t_0")],
        [("b_t_0", pbn.DiscreteFactorType()), ("c_t_0", pbn.CKDEType()),
         ("d_t_0", pbn.LinearGaussianCPDType())])
    lg = LinearGaussianCPD("d_t_0", ["a_t_2"], [3, 4], 1.5)
    other_transition.add_cpds([lg])

    assert other_static.type() == other_transition.type()

    dyn_other = DynamicOtherBN(variables, 2, other_static, other_transition)
    dyn_other.include_cpd = True
    return pickle.dumps(dyn_other)
Пример #23
0
def cond_other_fit_bytes():
    other = ConditionalOtherBN(["c", "d"], ["a", "b"], [("a", "c")],
                               [("c", pbn.CKDEType()),
                                ("d", pbn.DiscreteFactorType())])
    cpd_c = CKDE("c", ["a"])
    cpd_d = DiscreteFactor("d", [])

    df_continuous = util_test.generate_normal_data_indep(100)
    cpd_c.fit(df_continuous)

    df_discrete = util_test.generate_discrete_data_dependent(100)
    df_discrete.columns = df_discrete.columns.str.lower()
    cpd_d = DiscreteFactor("d", [])
    cpd_d.fit(df_discrete)

    other.add_cpds([cpd_c, cpd_d])

    other.include_cpd = True
    return pickle.dumps(other)
Пример #24
0
def other_fit_bytes():
    other = OtherBN(["a", "b", "c", "d"], [("a", "b")],
                    [("b", pbn.LinearGaussianCPDType()), ("c", pbn.CKDEType()),
                     ("d", pbn.DiscreteFactorType())])
    cpd_a = LinearGaussianCPD("a", [], [0], 0.5)
    cpd_b = LinearGaussianCPD("b", ["a"], [1, 2], 2)

    df_continuous = util_test.generate_normal_data_indep(100)
    cpd_c = CKDE("c", [])
    cpd_c.fit(df_continuous)

    df_discrete = util_test.generate_discrete_data_dependent(100)
    df_discrete.columns = df_discrete.columns.str.lower()
    cpd_d = DiscreteFactor("d", [])
    cpd_d.fit(df_discrete)

    other.add_cpds([cpd_a, cpd_b, cpd_c, cpd_d])

    other.include_cpd = True
    return pickle.dumps(other)
Пример #25
0
def test_fit():
    spbn = SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'),
                             ('b', 'd'), ('c', 'd')])

    with pytest.raises(ValueError) as ex:
        for n in spbn.nodes():
            cpd = spbn.cpd(n)
    assert "not added" in str(ex.value)

    spbn.fit(df)

    for n in spbn.nodes():
        cpd = spbn.cpd(n)
        assert cpd.type() == pbn.LinearGaussianCPDType()

        assert type(cpd) == pbn.LinearGaussianCPD
        assert cpd.variable() == n
        assert set(cpd.evidence()) == set(spbn.parents(n))

    spbn.fit(df)

    spbn.remove_arc('a', 'b')

    cpd_b = spbn.cpd('b')
    assert type(cpd_b) == pbn.LinearGaussianCPD
    assert cpd_b.evidence != spbn.parents('b')

    spbn.fit(df)
    cpd_b = spbn.cpd('b')
    assert type(cpd_b) == pbn.LinearGaussianCPD
    assert cpd_b.evidence() == spbn.parents('b')

    spbn.set_node_type('c', pbn.CKDEType())

    with pytest.raises(ValueError) as ex:
        cpd_c = spbn.cpd('c')
    assert "not added" in str(ex.value)

    spbn.fit(df)
    cpd_c = spbn.cpd('c')
    assert cpd_c.type() == spbn.node_type('c')
Пример #26
0
def test_mle_create():
    with pytest.raises(ValueError) as ex:
        mle = pbn.MLE(pbn.CKDEType())
    assert "MLE not available" in str(ex.value)

    mle = pbn.MLE(pbn.LinearGaussianCPDType())
Пример #27
0
def dyn_spbn_bytes():
    spbn = pbn.DynamicSemiparametricBN(["a", "b", "c", "d"], 2)
    spbn.static_bn().add_arc("a_t_2", "d_t_1")
    spbn.transition_bn().add_arc("c_t_2", "b_t_0")
    spbn.transition_bn().set_node_type("b_t_0", pbn.CKDEType())
    return pickle.dumps(spbn)
Пример #28
0
def test_serialization_fitted_dbn(dyn_gaussian_partial_fit_bytes,
                                  dyn_gaussian_fit_bytes,
                                  dyn_other_partial_fit_bytes,
                                  dyn_other_fit_bytes):
    # ####################
    # Gaussian partial fit
    # ####################
    loaded_partial = pickle.loads(dyn_gaussian_partial_fit_bytes)
    assert not loaded_partial.fitted()
    assert not loaded_partial.static_bn().fitted()
    assert not loaded_partial.transition_bn().fitted()
    cpd = loaded_partial.static_bn().cpd("d_t_1")
    assert cpd.variable() == "d_t_1"
    assert cpd.evidence() == ["a_t_2"]
    assert list(cpd.beta) == [1, 2]
    assert cpd.variance == 2

    cpd = loaded_partial.transition_bn().cpd("b_t_0")
    assert cpd.variable() == "b_t_0"
    assert cpd.evidence() == ["c_t_2"]
    assert list(cpd.beta) == [3, 4]
    assert cpd.variance == 5

    # ####################
    # Gaussian fit
    # ####################
    loaded_fitted = pickle.loads(dyn_gaussian_fit_bytes)
    assert loaded_fitted.fitted()
    assert loaded_fitted.static_bn().fitted()
    assert loaded_fitted.transition_bn().fitted()

    # ####################
    # Other partial fit
    # ####################
    loaded_partial = pickle.loads(dyn_other_partial_fit_bytes)
    assert not loaded_partial.fitted()
    assert not loaded_partial.static_bn().fitted()
    assert not loaded_partial.transition_bn().fitted()
    assert loaded_partial.static_bn().node_type(
        "b_t_1") == pbn.DiscreteFactorType()
    assert loaded_partial.static_bn().node_type("c_t_1") == pbn.CKDEType()
    assert loaded_partial.static_bn().node_type(
        "d_t_1") == pbn.LinearGaussianCPDType()

    assert loaded_partial.transition_bn().node_type(
        "b_t_0") == pbn.DiscreteFactorType()
    assert loaded_partial.transition_bn().node_type("c_t_0") == pbn.CKDEType()
    assert loaded_partial.transition_bn().node_type(
        "d_t_0") == pbn.LinearGaussianCPDType()

    cpd = loaded_partial.static_bn().cpd("d_t_1")
    assert cpd.variable() == "d_t_1"
    assert cpd.evidence() == ["a_t_2"]
    assert list(cpd.beta) == [1, 2]
    assert cpd.variance == 2

    cpd = loaded_partial.transition_bn().cpd("d_t_0")
    assert cpd.variable() == "d_t_0"
    assert cpd.evidence() == ["a_t_2"]
    assert list(cpd.beta) == [3, 4]
    assert cpd.variance == 1.5

    # ####################
    # Other fit
    # ####################
    loaded_fitted = pickle.loads(dyn_other_fit_bytes)
    assert loaded_fitted.fitted()
    assert loaded_fitted.static_bn().fitted()
    assert loaded_fitted.transition_bn().fitted()
    assert loaded_partial.static_bn().node_type(
        "b_t_1") == pbn.DiscreteFactorType()
    assert loaded_partial.static_bn().node_type("c_t_1") == pbn.CKDEType()
    assert loaded_partial.static_bn().node_type(
        "d_t_1") == pbn.LinearGaussianCPDType()

    assert loaded_partial.transition_bn().node_type(
        "b_t_0") == pbn.DiscreteFactorType()
    assert loaded_partial.transition_bn().node_type("c_t_0") == pbn.CKDEType()
    assert loaded_partial.transition_bn().node_type(
        "d_t_0") == pbn.LinearGaussianCPDType()

    cpd = loaded_partial.static_bn().cpd("d_t_1")
    assert cpd.variable() == "d_t_1"
    assert cpd.evidence() == ["a_t_2"]
    assert list(cpd.beta) == [1, 2]
    assert cpd.variance == 2

    cpd = loaded_partial.transition_bn().cpd("d_t_0")
    assert cpd.variable() == "d_t_0"
    assert cpd.evidence() == ["a_t_2"]
    assert list(cpd.beta) == [3, 4]
    assert cpd.variance == 1.5
def ckde_type_bytes():
    ckde = pbn.CKDEType()
    return pickle.dumps(ckde)
Пример #30
0
def test_cvl_local_score_null_spbn():
    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    np.random.seed(0)
    a_null = np.random.randint(0, SIZE, size=100)
    b_null = np.random.randint(0, SIZE, size=100)
    c_null = np.random.randint(0, SIZE, size=100)
    d_null = np.random.randint(0, SIZE, size=100)

    df_null = df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    cvl = pbn.CVLikelihood(df_null, 10, seed)

    assert np.isclose(cvl.local_score(spbn, 'a', []),
                      numpy_local_score(pbn.CKDEType(), df_null, 'a', []))
    assert np.isclose(
        cvl.local_score(spbn, 'b', ['a']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'b', ['a']))
    assert np.isclose(
        cvl.local_score(spbn, 'c', ['a', 'b']),
        numpy_local_score(pbn.CKDEType(), df_null, 'c', ['a', 'b']))
    assert np.isclose(
        cvl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'd',
                          ['a', 'b', 'c']))
    assert np.isclose(
        cvl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'd',
                          ['b', 'c', 'a']))

    assert cvl.local_score(spbn,
                           'a') == cvl.local_score(spbn, 'a',
                                                   spbn.parents('a'))
    assert cvl.local_score(spbn,
                           'b') == cvl.local_score(spbn, 'b',
                                                   spbn.parents('b'))
    assert cvl.local_score(spbn,
                           'c') == cvl.local_score(spbn, 'c',
                                                   spbn.parents('c'))
    assert cvl.local_score(spbn,
                           'd') == cvl.local_score(spbn, 'd',
                                                   spbn.parents('d'))

    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'a', []),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'a', []))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'b', ['a']),
        numpy_local_score(pbn.CKDEType(), df_null, 'b', ['a']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'c',
                                  ['a', 'b']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'c',
                          ['a', 'b']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.CKDEType(), df_null, 'd', ['a', 'b', 'c']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.CKDEType(), df_null, 'd', ['b', 'c', 'a']))