Exemplo n.º 1
0
def test_new_specific_bn_type():
    sp1 = SpecificNetwork(["a", "b", "c", "d"])
    sp2 = SpecificNetwork(["a", "b", "c", "d"], [("a", "b")])
    sp3 = SpecificNetwork(["a", "b", "c", "d"])

    assert sp1.type() == sp2.type()
    assert sp1.type() == sp3.type()
    assert sp2.type() == sp3.type()

    assert sp1.can_add_arc("a", "b")
    assert not sp1.can_add_arc("b", "a")
    assert not sp1.can_add_arc("c", "d")

    assert sp1.num_arcs() == sp3.num_arcs() == 0
    assert sp2.arcs() == [("a", "b")]

    df = util_test.generate_normal_data_indep(1000)
    bic = pbn.BIC(df)

    start = SpecificNetwork(["a", "b", "c", "d"])

    hc = pbn.GreedyHillClimbing()
    estimated = hc.estimate(pbn.ArcOperatorSet(), bic, start)
    assert estimated.type() == start.type()
    assert all([s == "a" for s, t in estimated.arcs()])

    # #######################
    # Conditional BN
    # #######################

    csp1 = ConditionalSpecificNetwork(["a", "b"], ["c", "d"])
    csp2 = ConditionalSpecificNetwork(["a", "b"], ["c", "d"], [("a", "b")])
    csp3 = ConditionalSpecificNetwork(["a", "b"], ["c", "d"])

    assert csp1.type() == csp2.type()
    assert csp1.type() == csp3.type()
    assert csp2.type() == csp3.type()

    assert csp1.can_add_arc("a", "b")
    assert not csp1.can_add_arc("b", "a")
    assert not csp1.can_add_arc("c", "d")

    assert csp1.num_arcs() == csp3.num_arcs() == 0
    assert csp2.arcs() == [("a", "b")]

    cstart = ConditionalSpecificNetwork(["a", "c"], ["b", "d"])

    hc = pbn.GreedyHillClimbing()
    cestimated = hc.estimate(pbn.ArcOperatorSet(), bic, cstart)
    assert cestimated.type() == cstart.type()
    assert all([s == "a" for s, t in cestimated.arcs()])
Exemplo n.º 2
0
def test_create():
    arcs = pbn.ArcOperatorSet()
    node_type = pbn.ChangeNodeTypeSet()
    pool = pbn.OperatorPool([arcs, node_type])

    with pytest.raises(ValueError) as ex:
        pool = pbn.OperatorPool([])
    assert "cannot be empty" in str(ex.value)
Exemplo n.º 3
0
def test_hc_conditional_estimate():
    bic = pbn.BIC(df)
    column_names = list(df.columns.values)

    start = pbn.ConditionalGaussianNetwork(column_names[2:], column_names[:2])

    nodes = column_names[2:]
    nodes.insert(1, 'e')
    interface_nodes = column_names[:2]
    interface_nodes.insert(1, 'f')
    start_removed_nodes = pbn.ConditionalGaussianNetwork(
        nodes, interface_nodes)
    start_removed_nodes.remove_node('e')
    start_removed_nodes.remove_interface_node('f')

    arc_set = pbn.ArcOperatorSet()
    hc = pbn.GreedyHillClimbing()

    res = hc.estimate(arc_set, bic, start, max_iters=1, verbose=False)
    assert res.num_arcs() == 1
    added_arc = res.arcs()[0]
    op_delta = bic.score(res) - bic.score(start)

    res_removed = hc.estimate(arc_set,
                              bic,
                              start_removed_nodes,
                              max_iters=1,
                              verbose=False)
    assert res_removed.num_arcs() == 1
    added_arc_removed = res_removed.arcs()[0]
    assert added_arc == added_arc_removed or added_arc == added_arc_removed[::
                                                                            -1]
    assert np.isclose(op_delta,
                      bic.score(res_removed) - bic.score(start_removed_nodes))

    assert np.isclose(
        op_delta,
        bic.local_score(res, added_arc[1], [added_arc[0]]) -
        bic.local_score(res, added_arc[1], []))
    assert np.isclose(
        op_delta,
        bic.local_score(res, added_arc_removed[1], [added_arc_removed[0]]) -
        bic.local_score(res, added_arc_removed[1], []))

    res = hc.estimate(arc_set, bic, start, epsilon=(op_delta + 0.01))
    assert res.num_arcs() == start.num_arcs()
    res_removed = hc.estimate(arc_set,
                              bic,
                              start_removed_nodes,
                              epsilon=(op_delta + 0.01))
    assert res_removed.num_arcs() == start_removed_nodes.num_arcs()

    res = hc.estimate(arc_set, bic, start, verbose=False)
    assert all(map(lambda arc: not res.is_interface(arc[1]), res.arcs()))
    res_removed = hc.estimate(arc_set, bic, start_removed_nodes, verbose=False)
    assert all(
        map(lambda arc: not res_removed.is_interface(arc[1]),
            res_removed.arcs()))
Exemplo n.º 4
0
def test_nomax():
    gbn = pbn.GaussianNetwork(['a', 'b'])

    bic = pbn.BIC(df)
    arc_op = pbn.ArcOperatorSet(whitelist=[("a", "b")])
    arc_op.cache_scores(gbn, bic)

    op = arc_op.find_max(gbn)

    assert op is None
Exemplo n.º 5
0
def test_newbn_estimate_validation():
    start = NewBN(["a", "b", "c", "d"])
    hc = pbn.GreedyHillClimbing()
    arc = pbn.ArcOperatorSet()
    bic = pbn.BIC(df)

    estimated = hc.estimate(arc, bic, start)

    assert type(start) == type(estimated)
    assert estimated.extra_data == "extra"
Exemplo n.º 6
0
def test_check_max_score():
    gbn = pbn.GaussianNetwork(['c', 'd'])

    bic = pbn.BIC(df)
    arc_op = pbn.ArcOperatorSet()

    arc_op.cache_scores(gbn, bic)
    op = arc_op.find_max(gbn)

    assert np.isclose(
        op.delta(),
        (bic.local_score(gbn, 'd', ['c']) - bic.local_score(gbn, 'd')))

    # BIC is decomposable so the best operation is the arc in reverse direction.
    arc_op.set_arc_blacklist([(op.source(), op.target())])
    arc_op.cache_scores(gbn, bic)

    op2 = arc_op.find_max(gbn)

    assert op.source() == op2.target()
    assert op.target() == op2.source()
    assert (type(op) == type(op2)) and (type(op) == pbn.AddArc)
Exemplo n.º 7
0
def test_find_max():
    spbn = pbn.SemiparametricBN(['a', 'b', 'c', 'd'])
    cv = pbn.CVLikelihood(df)
    arcs = pbn.ArcOperatorSet()
    node_type = pbn.ChangeNodeTypeSet()

    arcs.cache_scores(spbn, cv)
    spbn.set_unknown_node_types(df)
    node_type.cache_scores(spbn, cv)

    arcs_max = arcs.find_max(spbn)
    node_max = node_type.find_max(spbn)

    pool = pbn.OperatorPool([arcs, node_type])
    pool.cache_scores(spbn, cv)

    op_combined = pool.find_max(spbn)

    if arcs_max.delta() >= node_max.delta():
        assert op_combined == arcs_max
    else:
        assert op_combined == node_max
Exemplo n.º 8
0
def test_lists():
    gbn = pbn.GaussianNetwork(['a', 'b', 'c', 'd'])
    bic = pbn.BIC(df)
    arc_op = pbn.ArcOperatorSet()

    arc_op.set_arc_blacklist([("b", "a")])
    arc_op.set_arc_whitelist([("b", "c")])
    arc_op.set_max_indegree(3)
    arc_op.set_type_whitelist([("a", pbn.LinearGaussianCPDType())])

    arc_op.cache_scores(gbn, bic)

    arc_op.set_arc_blacklist([("e", "a")])

    with pytest.raises(IndexError) as ex:
        arc_op.cache_scores(gbn, bic)
    assert "not present in the graph" in str(ex.value)

    arc_op.set_arc_whitelist([("e", "a")])

    with pytest.raises(IndexError) as ex:
        arc_op.cache_scores(gbn, bic)
    assert "not present in the graph" in str(ex.value)
Exemplo n.º 9
0
def test_hc_estimate():
    bic = pbn.BIC(df)
    column_names = list(df.columns.values)
    start = pbn.GaussianNetwork(column_names)

    # Check algorithm with BN with nodes removed.
    column_names.insert(1, 'e')
    column_names.insert(3, 'f')
    start_removed_nodes = pbn.GaussianNetwork(column_names)
    start_removed_nodes.remove_node('e')
    start_removed_nodes.remove_node('f')

    arc_set = pbn.ArcOperatorSet()

    hc = pbn.GreedyHillClimbing()

    res = hc.estimate(arc_set, bic, start, max_iters=1)
    assert res.num_arcs() == 1
    added_arc = res.arcs()[0]
    op_delta = bic.score(res) - bic.score(start)

    res_removed = hc.estimate(arc_set, bic, start_removed_nodes, max_iters=1)
    assert res.num_arcs() == 1
    added_arc_removed = res_removed.arcs()[0]
    assert added_arc == added_arc_removed or added_arc == added_arc_removed[::
                                                                            -1]
    assert np.isclose(op_delta,
                      bic.score(res_removed) - bic.score(start_removed_nodes))

    # BIC is score equivalent, so if we blacklist the added_arc, its reverse will be added.
    res = hc.estimate(arc_set,
                      bic,
                      start,
                      max_iters=1,
                      arc_blacklist=[added_arc])
    assert res.num_arcs() == 1
    reversed_arc = res.arcs()[0][::-1]
    assert added_arc == reversed_arc

    res_removed = hc.estimate(arc_set,
                              bic,
                              start_removed_nodes,
                              max_iters=1,
                              arc_blacklist=[added_arc_removed])
    assert res.num_arcs() == 1
    reversed_arc_removed = res_removed.arcs()[0][::-1]
    assert added_arc_removed == reversed_arc_removed

    assert np.isclose(
        op_delta,
        bic.local_score(res, added_arc[1], [added_arc[0]]) -
        bic.local_score(res, added_arc[1], []))
    assert np.isclose(
        op_delta,
        bic.local_score(res, added_arc_removed[1], [added_arc_removed[0]]) -
        bic.local_score(res, added_arc_removed[1], []))

    res = hc.estimate(arc_set, bic, start, epsilon=(op_delta + 0.01))
    assert res.num_arcs() == start.num_arcs()

    res_removed = hc.estimate(arc_set,
                              bic,
                              start_removed_nodes,
                              epsilon=(op_delta + 0.01))
    assert res_removed.num_arcs() == start_removed_nodes.num_arcs()

    # Can't compare models because the arcs could be oriented in different direction,
    # leading to a different search path. Execute the code, just to check no error is given.
    res = hc.estimate(arc_set, bic, start, verbose=False)
    res_removed = hc.estimate(arc_set, bic, start_removed_nodes, verbose=False)
Exemplo n.º 10
0
def test_hc_estimate_validation():
    column_names = list(df.columns.values)
    start = pbn.GaussianNetwork(column_names)

    column_names.insert(1, 'e')
    column_names.insert(4, 'f')
    start_removed_nodes = pbn.GaussianNetwork(column_names)
    start_removed_nodes.remove_node('e')
    start_removed_nodes.remove_node('f')

    vl = pbn.ValidatedLikelihood(df)
    arc_set = pbn.ArcOperatorSet()

    hc = pbn.GreedyHillClimbing()

    res = hc.estimate(arc_set, vl, start, max_iters=1)
    assert res.num_arcs() == 1
    added_arc = res.arcs()[0]
    op_delta = vl.cv_lik.score(res) - vl.cv_lik.score(start)

    res_removed = hc.estimate(arc_set, vl, start_removed_nodes, max_iters=1)
    assert res_removed.num_arcs() == 1
    added_arc_removed = res_removed.arcs()[0]
    assert added_arc == added_arc_removed or added_arc == added_arc_removed[::
                                                                            -1]
    assert np.isclose(
        op_delta,
        vl.cv_lik.score(res_removed) - vl.cv_lik.score(start_removed_nodes))

    assert np.isclose(
        op_delta,
        vl.cv_lik.local_score(res, added_arc[1], [added_arc[0]]) -
        vl.cv_lik.local_score(res, added_arc[1], []))
    assert np.isclose(
        op_delta,
        vl.cv_lik.local_score(res, added_arc_removed[1],
                              [added_arc_removed[0]]) -
        vl.cv_lik.local_score(res, added_arc_removed[1], []))

    # CV is score equivalent for GBNs, so if we blacklist the added_edge, its reverse will be added.
    res = hc.estimate(arc_set,
                      vl,
                      start,
                      max_iters=1,
                      arc_blacklist=[added_arc])
    assert res.num_arcs() == 1
    reversed_arc = res.arcs()[0][::-1]
    assert added_arc == reversed_arc

    res_removed = hc.estimate(arc_set,
                              vl,
                              start_removed_nodes,
                              max_iters=1,
                              arc_blacklist=[added_arc_removed])
    assert res_removed.num_arcs() == 1
    reversed_arc_removed = res_removed.arcs()[0][::-1]
    assert reversed_arc == reversed_arc_removed

    res = hc.estimate(arc_set, vl, start, epsilon=(op_delta + 0.01))
    assert res.num_arcs() == start.num_arcs()

    res_removed = hc.estimate(arc_set,
                              vl,
                              start_removed_nodes,
                              epsilon=(op_delta + 0.01))
    assert res_removed.num_arcs() == start_removed_nodes.num_arcs()

    # Can't compare models because the arcs could be oriented in different direction,
    # leading to a different search path. Execute the code, just to check no error is given.
    res = hc.estimate(arc_set, vl, start, verbose=False)
    res_removed = hc.estimate(arc_set, vl, start_removed_nodes, verbose=False)