Пример #1
0
def test_spn_construction_by_init_and_evaluation():

    # building the same levels
    input_layer = build_spn_indicator_layer(vars)
    sum_layer, prod_layer = build_spn_layers(input_layer)

    spn = Spn(input_layer=input_layer, layers=[sum_layer, prod_layer])

    res = spn.eval(I)
    print('First evaluation')
    print(res)

    assert_log_array_almost_equal(root_vals, res)
Пример #2
0
def test_spn_construction_by_add_and_evaluation():
    spn = Spn()

    # building the same levels
    input_layer = build_spn_indicator_layer(vars)
    sum_layer, prod_layer = build_spn_layers(input_layer)

    # adding all layers to the spn
    spn.set_input_layer(input_layer)
    spn.add_layer(sum_layer)
    spn.add_layer(prod_layer)

    res = spn.eval(I)
    print('First evaluation')
    print(res)
    assert_log_array_almost_equal(root_vals, res)
Пример #3
0
def test_spn_construction_by_add_and_evaluation_II():
    spn = Spn()

    # print('empty spn')
    # print(spn)

    input_layer = build_spn_smoothed_layer(vars, dicts, alpha)
    prod_layer = build_spn_layers_II(input_layer)

    # adding all layers to the spn
    spn.set_input_layer(input_layer)
    spn.add_layer(prod_layer)

    # print('created spn')
    # print(spn)

    res = spn.eval(I)
    print('First smoothed evaluation')
    print(res)
    assert_log_array_almost_equal(root_vals, res)
Пример #4
0
def test_spn_set_get_weights():
    # create a simple spn
    root_node = SumNode()
    root_layer = SumLayer([root_node])

    prod_node_1 = ProductNode()
    prod_node_2 = ProductNode()
    root_node.add_child(prod_node_1, 0.5)
    root_node.add_child(prod_node_2, 0.5)
    prod_layer = ProductLayer([prod_node_1,
                               prod_node_2])

    sum_node_1 = SumNode()
    sum_node_2 = SumNode()
    sum_node_3 = SumNode()
    prod_node_1.add_child(sum_node_1)
    prod_node_1.add_child(sum_node_2)
    prod_node_2.add_child(sum_node_2)
    prod_node_2.add_child(sum_node_3)
    sum_layer = SumLayer([sum_node_1, sum_node_2,
                          sum_node_3])

    ind_node_1 = CategoricalIndicatorNode(var=0, var_val=1)
    ind_node_2 = CategoricalIndicatorNode(var=0, var_val=1)
    ind_node_3 = CategoricalIndicatorNode(var=0, var_val=1)
    ind_node_4 = CategoricalIndicatorNode(var=0, var_val=1)
    ind_node_5 = CategoricalIndicatorNode(var=0, var_val=1)
    input_layer = CategoricalInputLayer(nodes=[ind_node_1,
                                               ind_node_2,
                                               ind_node_3,
                                               ind_node_4,
                                               ind_node_5])
    sum_node_1.add_child(ind_node_1, 0.2)
    sum_node_1.add_child(ind_node_2, 0.2)
    sum_node_2.add_child(ind_node_2, 0.2)
    sum_node_2.add_child(ind_node_3, 0.2)
    sum_node_2.add_child(ind_node_4, 0.2)
    sum_node_3.add_child(ind_node_4, 0.2)
    sum_node_3.add_child(ind_node_5, 0.2)

    spn = Spn(input_layer=input_layer,
              layers=[sum_layer, prod_layer, root_layer])

    print(spn)

    # storing these weights
    curr_weights = spn.get_weights()

    # setting the new weights
    spn.set_weights(weights_ds)

    # getting them again
    new_weights = spn.get_weights()

    # comparing them
    assert new_weights == weights_ds

    # now setting back the previous one
    spn.set_weights(curr_weights)

    # getting them back again
    old_weights = spn.get_weights()

    # and checking
    assert old_weights == curr_weights
Пример #5
0
def create_valid_toy_spn():
    # root layer
    whole_scope = frozenset({0, 1, 2, 3})
    root_node = SumNode(var_scope=whole_scope)
    root_layer = SumLayer([root_node])

    # prod layer
    prod_node_1 = ProductNode(var_scope=whole_scope)
    prod_node_2 = ProductNode(var_scope=whole_scope)
    prod_layer_1 = ProductLayer([prod_node_1, prod_node_2])

    root_node.add_child(prod_node_1, 0.5)
    root_node.add_child(prod_node_2, 0.5)

    # sum layer
    scope_1 = frozenset({0, 1})
    scope_2 = frozenset({2})
    scope_3 = frozenset({3})
    scope_4 = frozenset({2, 3})

    sum_node_1 = SumNode(var_scope=scope_1)
    sum_node_2 = SumNode(var_scope=scope_2)
    sum_node_3 = SumNode(var_scope=scope_3)
    sum_node_4 = SumNode(var_scope=scope_4)

    prod_node_1.add_child(sum_node_1)
    prod_node_1.add_child(sum_node_2)
    prod_node_1.add_child(sum_node_3)

    prod_node_2.add_child(sum_node_1)
    prod_node_2.add_child(sum_node_4)

    sum_layer_1 = SumLayer([sum_node_1, sum_node_2,
                            sum_node_3, sum_node_4])

    # another product layer
    prod_node_3 = ProductNode(var_scope=scope_1)
    prod_node_4 = ProductNode(var_scope=scope_1)

    prod_node_5 = ProductNode(var_scope=scope_4)
    prod_node_6 = ProductNode(var_scope=scope_4)

    sum_node_1.add_child(prod_node_3, 0.5)
    sum_node_1.add_child(prod_node_4, 0.5)

    sum_node_4.add_child(prod_node_5, 0.5)
    sum_node_4.add_child(prod_node_6, 0.5)

    prod_layer_2 = ProductLayer([prod_node_3, prod_node_4,
                                 prod_node_5, prod_node_6])

    # last sum one
    scope_5 = frozenset({0})
    scope_6 = frozenset({1})

    sum_node_5 = SumNode(var_scope=scope_5)
    sum_node_6 = SumNode(var_scope=scope_6)
    sum_node_7 = SumNode(var_scope=scope_5)
    sum_node_8 = SumNode(var_scope=scope_6)

    sum_node_9 = SumNode(var_scope=scope_2)
    sum_node_10 = SumNode(var_scope=scope_3)
    sum_node_11 = SumNode(var_scope=scope_2)
    sum_node_12 = SumNode(var_scope=scope_3)

    prod_node_3.add_child(sum_node_5)
    prod_node_3.add_child(sum_node_6)
    prod_node_4.add_child(sum_node_7)
    prod_node_4.add_child(sum_node_8)

    prod_node_5.add_child(sum_node_9)
    prod_node_5.add_child(sum_node_10)
    prod_node_6.add_child(sum_node_11)
    prod_node_6.add_child(sum_node_12)

    sum_layer_2 = SumLayer([sum_node_5, sum_node_6,
                            sum_node_7, sum_node_8,
                            sum_node_9, sum_node_10,
                            sum_node_11, sum_node_12])

    # input layer
    vars = [2, 3, 2, 2]
    input_layer = CategoricalIndicatorLayer(vars=vars)
    last_sum_nodes = [sum_node_2, sum_node_3,
                      sum_node_5, sum_node_6,
                      sum_node_7, sum_node_8,
                      sum_node_9, sum_node_10,
                      sum_node_11, sum_node_12]
    for sum_node in last_sum_nodes:
        (var_scope,) = sum_node.var_scope
        for input_node in input_layer.nodes():
            if input_node.var == var_scope:
                sum_node.add_child(input_node, 1.0)

    spn = Spn(input_layer=input_layer,
              layers=[sum_layer_2, prod_layer_2,
                      sum_layer_1, prod_layer_1,
                      root_layer])

    # print(spn)
    return spn
Пример #6
0
def test_spn_mpe_eval_and_traversal():
    # create initial layer
    node1 = Node()
    node2 = Node()
    node3 = Node()
    node4 = Node()
    node5 = Node()

    input_layer = CategoricalInputLayer([node1, node2,
                                         node3, node4,
                                         node5])

    # top layer made by 3 sum nodes
    sum1 = SumNode()
    sum2 = SumNode()
    sum3 = SumNode()

    # linking to input nodes
    weight11 = 0.3
    sum1.add_child(node1, weight11)
    weight12 = 0.3
    sum1.add_child(node2, weight12)
    weight13 = 0.4
    sum1.add_child(node3, weight13)

    weight22 = 0.15
    sum2.add_child(node2, weight22)
    weight23 = 0.15
    sum2.add_child(node3, weight23)
    weight24 = 0.7
    sum2.add_child(node4, weight24)

    weight33 = 0.4
    sum3.add_child(node3, weight33)
    weight34 = 0.25
    sum3.add_child(node4, weight34)
    weight35 = 0.35
    sum3.add_child(node5, weight35)

    sum_layer = SumLayer([sum1, sum2, sum3])

    # another layer with two product nodes
    prod1 = ProductNode()
    prod2 = ProductNode()

    prod1.add_child(sum1)
    prod1.add_child(sum2)
    prod2.add_child(sum2)
    prod2.add_child(sum3)

    prod_layer = ProductLayer([prod1, prod2])

    # root layer, double sum
    root1 = SumNode()
    root2 = SumNode()

    weightr11 = 0.5
    root1.add_child(prod1, weightr11)
    weightr12 = 0.5
    root1.add_child(prod2, weightr12)

    weightr21 = 0.9
    root2.add_child(prod1, weightr21)
    weightr22 = 0.1
    root2.add_child(prod2, weightr22)

    root_layer = SumLayer([root1, root2])

    # create the spn
    spn = Spn(input_layer=input_layer,
              layers=[sum_layer, prod_layer, root_layer])

    print('===================')
    print(spn)
    print('===================')

    # setting the input values
    val1 = 0.0
    node1.set_val(val1)
    val2 = 0.5
    node2.set_val(val2)
    val3 = 0.3
    node3.set_val(val3)
    val4 = 1.0
    node4.set_val(val4)
    val5 = 0.0
    node5.set_val(val5)

    # evaluating the spn with MPE inference
    res = spn.test_mpe_eval()
    print('spn eval\'d', res)

    # testing it
    #
    # testing the max layer
    max1 = max(val1 * weight11,
               val2 * weight12,
               val3 * weight13)
    max2 = max(val2 * weight22,
               val3 * weight23,
               val4 * weight24)
    max3 = max(val3 * weight33,
               val4 * weight34,
               val5 * weight35)
    log_max1 = log(max1) if not numpy.isclose(max1, 0) else LOG_ZERO
    log_max2 = log(max2) if not numpy.isclose(max2, 0) else LOG_ZERO
    log_max3 = log(max3) if not numpy.isclose(max3, 0) else LOG_ZERO

    print('expected max vals {0}, {1}, {2}'.format(log_max1,
                                                   log_max2,
                                                   log_max3))
    print('found    max vals {0}, {1}, {2}'.format(sum1.log_val,
                                                   sum2.log_val,
                                                   sum3.log_val))
    if IS_LOG_ZERO(log_max1):
        assert IS_LOG_ZERO(sum1.log_val)
    else:
        assert_almost_equal(log_max1, sum1.log_val)
    if IS_LOG_ZERO(log_max2):
        assert IS_LOG_ZERO(sum2.log_val)
    else:
        assert_almost_equal(log_max2, sum2.log_val)
    if IS_LOG_ZERO(log_max3):
        assert IS_LOG_ZERO(sum3.log_val)
    else:
        assert_almost_equal(log_max3, sum3.log_val)

    # product layer is assumed to be fine, but let's check
    # it anyways
    prod_val1 = max1 * max2
    prod_val2 = max2 * max3
    prod_log_val1 = log_max1 + log_max2
    prod_log_val2 = log_max2 + log_max3

    print('exp prod vals {0}, {1}'.format(prod_log_val1,
                                          prod_log_val2))
    print('rea prod vals {0}, {1}'.format(prod1.log_val,
                                          prod2.log_val))
    if IS_LOG_ZERO(prod_log_val1):
        assert IS_LOG_ZERO(prod1.log_val)
    else:
        assert_almost_equal(prod_log_val1, prod1.log_val)

    if IS_LOG_ZERO(prod_log_val2):
        assert IS_LOG_ZERO(prod2.log_val)
    else:
        assert_almost_equal(prod_log_val2, prod2.log_val)

    # root layer, again a sum layer
    root_val1 = max(prod_val1 * weightr11,
                    prod_val2 * weightr12)
    root_val2 = max(prod_val1 * weightr21,
                    prod_val2 * weightr22)
    root_log_val1 = log(root_val1) if not numpy.isclose(
        root_val1, 0) else LOG_ZERO
    root_log_val2 = log(root_val2) if not numpy.isclose(
        root_val2, 0) else LOG_ZERO

    print('exp root vals {0}, {1}'.format(root_log_val1,
                                          root_log_val2))
    print('found ro vals {0}, {1}'.format(root1.log_val,
                                          root2.log_val))

    if IS_LOG_ZERO(root_log_val1):
        assert IS_LOG_ZERO(root1.log_val)
    else:
        assert_almost_equal(root_log_val1, root1.log_val)
    if IS_LOG_ZERO(root_log_val2):
        assert IS_LOG_ZERO(root2.log_val)
    else:
        assert_almost_equal(root_log_val2, root2.log_val)

    # now we are traversing top down the net
    print('mpe traversing')
    for i, j, k in spn.mpe_traversal():
        print(i, j, k)
Пример #7
0
def test_spn_backprop():
    # create initial layer
    node1 = Node()
    node2 = Node()
    node3 = Node()
    node4 = Node()
    node5 = Node()

    input_layer = CategoricalInputLayer([node1, node2,
                                         node3, node4,
                                         node5])

    # top layer made by 3 sum nodes
    sum1 = SumNode()
    sum2 = SumNode()
    sum3 = SumNode()

    # linking to input nodes
    weight11 = 0.3
    sum1.add_child(node1, weight11)
    weight12 = 0.3
    sum1.add_child(node2, weight12)
    weight13 = 0.4
    sum1.add_child(node3, weight13)

    weight22 = 0.15
    sum2.add_child(node2, weight22)
    weight23 = 0.15
    sum2.add_child(node3, weight23)
    weight24 = 0.7
    sum2.add_child(node4, weight24)

    weight33 = 0.4
    sum3.add_child(node3, weight33)
    weight34 = 0.25
    sum3.add_child(node4, weight34)
    weight35 = 0.35
    sum3.add_child(node5, weight35)

    sum_layer = SumLayer([sum1, sum2, sum3])

    # another layer with two product nodes
    prod1 = ProductNode()
    prod2 = ProductNode()

    prod1.add_child(sum1)
    prod1.add_child(sum2)
    prod2.add_child(sum2)
    prod2.add_child(sum3)

    prod_layer = ProductLayer([prod1, prod2])

    # root layer, double sum
    root1 = SumNode()
    root2 = SumNode()

    weightr11 = 0.5
    root1.add_child(prod1, weightr11)
    weightr12 = 0.5
    root1.add_child(prod2, weightr12)

    weightr21 = 0.9
    root2.add_child(prod1, weightr21)
    weightr22 = 0.1
    root2.add_child(prod2, weightr22)

    root_layer = SumLayer([root1, root2])
    # root_layer = SumLayer([root1])

    # create the spn
    spn = Spn(input_layer=input_layer,
              layers=[sum_layer, prod_layer, root_layer])

    # setting the input values
    val1 = 0.0
    node1.set_val(val1)
    val2 = 0.5
    node2.set_val(val2)
    val3 = 0.3
    node3.set_val(val3)
    val4 = 1.0
    node4.set_val(val4)
    val5 = 0.0
    node5.set_val(val5)

    # evaluating the spn
    res = spn.test_eval()
    print('spn eval\'d', res)

    # backprop
    spn.backprop()

    # computing derivatives by hand
    # topdown: root layer
    root_der = 1.0
    log_root_der = log(root_der)

    # print('root ders', root1.log_der, root2.log_der)
    print('root ders', root1.log_der)
    assert_almost_equal(log_root_der, root1.log_der)
    assert_almost_equal(log_root_der, root2.log_der)

    # product layer
    prod_der1 = (root_der * weightr11 +
                 root_der * weightr21)

    prod_der2 = (root_der * weightr12 +
                 root_der * weightr22)

    # prod_der1 = (root_der * weightr11)
    # prod_der2 = (root_der * weightr12)

    log_prod_der1 = log(prod_der1) if prod_der1 > 0.0 else LOG_ZERO
    log_prod_der2 = log(prod_der2) if prod_der2 > 0.0 else LOG_ZERO

    print('found  prod ders', prod1.log_der, prod2.log_der)
    print('expect prod ders', log_prod_der1, log_prod_der2)

    if IS_LOG_ZERO(log_prod_der1):
        assert IS_LOG_ZERO(prod1.log_der)
    else:
        assert_almost_equal(log_prod_der1, prod1.log_der)
    if IS_LOG_ZERO(log_prod_der2):
        assert IS_LOG_ZERO(prod2.log_der)
    else:
        assert_almost_equal(log_prod_der2, prod2.log_der)

    # sum layer
    sum_der1 = (
        prod_der1 * (weight22 * val2 +
                     weight23 * val3 +
                     weight24 * val4))

    log_sum_der1 = log(sum_der1) if sum_der1 > 0.0 else LOG_ZERO

    sum_der2 = (prod_der1 * (weight11 * val1 +
                             weight12 * val2 +
                             weight13 * val3) +
                prod_der2 * (weight33 * val3 +
                             weight34 * val4 +
                             weight35 * val5))

    log_sum_der2 = log(sum_der2) if sum_der2 > 0.0 else LOG_ZERO

    sum_der3 = (prod_der2 * (weight22 * val2 +
                             weight23 * val3 +
                             weight24 * val4))

    log_sum_der3 = log(sum_der3) if sum_der3 > 0.0 else LOG_ZERO

    print('expected sum ders', log_sum_der1,
          log_sum_der2,
          log_sum_der3)
    print('found    sum ders', sum1.log_der,
          sum2.log_der,
          sum3.log_der)

    if IS_LOG_ZERO(log_sum_der1):
        assert IS_LOG_ZERO(sum1.log_der)
    else:
        assert_almost_equal(log_sum_der1, sum1.log_der)
    if IS_LOG_ZERO(log_sum_der2):
        assert IS_LOG_ZERO(sum2.log_der)
    else:
        assert_almost_equal(log_sum_der2, sum2.log_der)
    if IS_LOG_ZERO(log_sum_der3):
        assert IS_LOG_ZERO(sum3.log_der)
    else:
        assert_almost_equal(log_sum_der3, sum3.log_der)

    # final level, the first one
    try:
        log_der1 = log(sum_der1 * weight11)
    except:
        log_der1 = LOG_ZERO

    try:
        log_der2 = log(sum_der1 * weight12 +
                       sum_der2 * weight22)
    except:
        log_der2 = LOG_ZERO

    try:
        log_der3 = log(sum_der1 * weight13 +
                       sum_der2 * weight23 +
                       sum_der3 * weight33)
    except:
        log_der3 = LOG_ZERO

    try:
        log_der4 = log(sum_der2 * weight24 +
                       sum_der3 * weight34)
    except:
        log_der4 = LOG_ZERO

    try:
        log_der5 = log(sum_der3 * weight35)
    except:
        log_der5 = LOG_ZERO

    # printing, just in case
    print('child log der', node1.log_der, node2.log_der,
          node3.log_der, node4.log_der, node5.log_der)
    print('exact log der', log_der1, log_der2, log_der3,
          log_der4, log_der5)

    if IS_LOG_ZERO(log_der1):
        assert IS_LOG_ZERO(node1.log_der)
    else:
        assert_almost_equal(log_der1, node1.log_der, 15)
    if IS_LOG_ZERO(log_der2):
        assert IS_LOG_ZERO(node2.log_der)
    else:
        assert_almost_equal(log_der2, node2.log_der, 15)
    if IS_LOG_ZERO(log_der3):
        assert IS_LOG_ZERO(node3.log_der)
    else:
        assert_almost_equal(log_der3, node3.log_der, 15)
    if IS_LOG_ZERO(log_der4):
        assert IS_LOG_ZERO(node4.log_der)
    else:
        assert_almost_equal(log_der4, node4.log_der, 15)
    if IS_LOG_ZERO(log_der5):
        assert IS_LOG_ZERO(node5.log_der)
    else:
        assert_almost_equal(log_der5, node5.log_der, 15)
Пример #8
0
def test_linked_to_theano_indicator():
    # creating single nodes
    root = SumNode()

    prod1 = ProductNode()
    prod2 = ProductNode()
    prod3 = ProductNode()

    sum1 = SumNode()
    sum2 = SumNode()
    sum3 = SumNode()
    sum4 = SumNode()

    ind1 = CategoricalIndicatorNode(var=0, var_val=0)
    ind2 = CategoricalIndicatorNode(var=0, var_val=1)
    ind3 = CategoricalIndicatorNode(var=1, var_val=0)
    ind4 = CategoricalIndicatorNode(var=1, var_val=1)
    ind5 = CategoricalIndicatorNode(var=2, var_val=0)
    ind6 = CategoricalIndicatorNode(var=2, var_val=1)
    ind7 = CategoricalIndicatorNode(var=2, var_val=2)
    ind8 = CategoricalIndicatorNode(var=3, var_val=0)
    ind9 = CategoricalIndicatorNode(var=3, var_val=1)
    ind10 = CategoricalIndicatorNode(var=3, var_val=2)
    ind11 = CategoricalIndicatorNode(var=3, var_val=3)

    prod4 = ProductNode()
    prod5 = ProductNode()
    prod6 = ProductNode()
    prod7 = ProductNode()

    # linking nodes
    root.add_child(prod1, 0.3)
    root. add_child(prod2, 0.3)
    root.add_child(prod3, 0.4)

    prod1.add_child(sum1)
    prod1.add_child(sum2)
    prod2.add_child(ind7)
    prod2.add_child(ind8)
    prod2.add_child(ind11)
    prod3.add_child(sum3)
    prod3.add_child(sum4)

    sum1.add_child(ind1, 0.3)
    sum1.add_child(ind2, 0.3)
    sum1.add_child(prod4, 0.4)

    sum2.add_child(ind2, 0.5)
    sum2.add_child(prod4, 0.2)
    sum2.add_child(prod5, 0.3)

    sum3.add_child(prod6, 0.5)
    sum3.add_child(prod7, 0.5)
    sum4.add_child(prod6, 0.5)
    sum4.add_child(prod7, 0.5)

    prod4.add_child(ind3)
    prod4.add_child(ind4)
    prod5.add_child(ind5)
    prod5.add_child(ind6)
    prod6.add_child(ind9)
    prod6.add_child(ind10)
    prod7.add_child(ind9)
    prod7.add_child(ind10)

    # building layers from nodes
    root_layer = SumLayerLinked([root])
    prod_layer = ProductLayerLinked([prod1, prod2, prod3])
    sum_layer = SumLayerLinked([sum1, sum2, sum3, sum4])
    aprod_layer = ProductLayerLinked([prod4, prod5, prod6, prod7])
    ind_layer = CategoricalIndicatorLayer(nodes=[ind1, ind2,
                                                 ind3, ind4,
                                                 ind5, ind6,
                                                 ind7, ind8,
                                                 ind9, ind10,
                                                 ind11])

    # creating the linked spn
    spn_linked = SpnLinked(input_layer=ind_layer,
                           layers=[aprod_layer,
                                   sum_layer,
                                   prod_layer,
                                   root_layer])

    print(spn_linked)

    # converting to theano repr
    spn_theano = SpnFactory.linked_to_theano(spn_linked)
    print(spn_theano)

    # time for some inference comparison
    for instance in I:
        print('linked')
        res_l = spn_linked.eval(instance)
        print(res_l)
        print('theano')
        res_t = spn_theano.eval(instance)
        print(res_t)
        assert_array_almost_equal(res_l, res_t)
Пример #9
0
def test_spn_sampling():

    from collections import Counter

    from spn.factory import linked_categorical_input_to_indicators

    #
    # building a small mixture model
    features = [2, 2, 2, 2]
    n_features = len(features)

    #
    # different categorical vars groups as leaves
    input_nodes_1 = [
        CategoricalSmoothedNode(i, features[i], alpha=0.0, freqs=[0, 1])
        for i in range(n_features)
    ]

    input_nodes_2 = [
        CategoricalSmoothedNode(i, features[i], alpha=0.0, freqs=[1, 0])
        for i in range(n_features)
    ]

    input_nodes_3 = [CategoricalSmoothedNode(i, features[i], alpha=0.0,
                                             freqs=[1, 0]) for i in range(n_features // 2)] + \
        [CategoricalSmoothedNode(i, features[i], alpha=0.0,
                                 freqs=[0, 1]) for i in range(n_features // 2, n_features)]

    input_nodes_4 = [CategoricalSmoothedNode(i, features[i], alpha=0.0,
                                             freqs=[0, 1]) for i in range(n_features // 2)] + \
        [CategoricalSmoothedNode(i, features[i], alpha=0.0,
                                 freqs=[1, 0]) for i in range(n_features // 2, n_features)]

    input_layer = CategoricalSmoothedLayer(
        nodes=input_nodes_1 + input_nodes_2 + input_nodes_3 + input_nodes_4)
    #
    # one product node for each group
    prod_node_1 = ProductNode()
    for leaf in input_nodes_1:
        prod_node_1.add_child(leaf)

    prod_node_2 = ProductNode()
    for leaf in input_nodes_2:
        prod_node_2.add_child(leaf)

    prod_node_3 = ProductNode()
    for leaf in input_nodes_3:
        prod_node_3.add_child(leaf)

    prod_node_4 = ProductNode()
    for leaf in input_nodes_4:
        prod_node_4.add_child(leaf)

    prod_layer = ProductLayer(
        nodes=[prod_node_1, prod_node_2, prod_node_3, prod_node_4])

    #
    # one root as a mixture
    root = SumNode()
    root.add_child(prod_node_1, 0.5)
    root.add_child(prod_node_2, 0.1)
    root.add_child(prod_node_3, 0.2)
    root.add_child(prod_node_4, 0.2)

    root_layer = SumLayer(nodes=[root])

    spn = Spn(input_layer=input_layer, layers=[prod_layer, root_layer])

    print(spn)

    n_instances = 1000
    #
    # sampling some instances
    sample_start_t = perf_counter()
    samples = spn.sample(n_instances=n_instances, verbose=False)
    sample_end_t = perf_counter()
    print('Sampled in {} secs'.format(sample_end_t - sample_start_t))
    if n_instances < 20:
        print(samples)

    #
    # some statistics
    tuple_samples = [tuple(s) for s in samples]
    if n_instances < 20:
        print(tuple_samples)

    sample_counter = Counter(tuple_samples)
    print(sample_counter)

    #
    # transforming into an spn with indicator nodes
    print('Into indicator nodes')
    ind_start_t = perf_counter()
    spn = linked_categorical_input_to_indicators(spn)
    ind_end_t = perf_counter()
    print('Done in ', ind_end_t - ind_start_t)

    sample_start_t = perf_counter()
    samples = spn.sample(n_instances=n_instances,
                         verbose=False,
                         one_hot_encoding=True)
    sample_end_t = perf_counter()
    print('Sampled in {} secs'.format(sample_end_t - sample_start_t))
    if n_instances < 20:
        print(samples)

    #
    # some statistics
    tuple_samples = [tuple(s) for s in samples]
    if n_instances < 20:
        print(tuple_samples)

    sample_counter = Counter(tuple_samples)
    print(sample_counter)
Пример #10
0
def test_linked_to_theano_categorical():
    vars = [2, 2, 3, 4]
    freqs = [{'var': 0, 'freqs': [1, 2]},
             {'var': 1, 'freqs': [2, 2]},
             {'var': 0, 'freqs': [3, 2]},
             {'var': 1, 'freqs': [0, 3]},
             {'var': 2, 'freqs': [1, 0, 2]},
             {'var': 3, 'freqs': [1, 2, 1, 2]},
             {'var': 3, 'freqs': [3, 4, 0, 1]}]

    # create input layer first
    input_layer = CategoricalSmoothedLayer(vars=vars,
                                           node_dicts=freqs)
    # get nodes
    ind_nodes = [node for node in input_layer.nodes()]

    root_node = ProductNode()

    sum1 = SumNode()
    sum2 = SumNode()

    prod1 = ProductNode()
    prod2 = ProductNode()

    sum3 = SumNode()
    sum4 = SumNode()

    # linking
    root_node.add_child(sum1)
    root_node.add_child(sum2)
    root_node.add_child(ind_nodes[0])
    root_node.add_child(ind_nodes[1])

    sum1.add_child(ind_nodes[2], 0.4)
    sum1.add_child(ind_nodes[3], 0.6)
    sum2.add_child(ind_nodes[3], 0.2)
    sum2.add_child(prod1, 0.5)
    sum2.add_child(prod2, 0.3)

    prod1.add_child(ind_nodes[4])
    prod1.add_child(sum3)
    prod1.add_child(sum4)
    prod2.add_child(sum3)
    prod2.add_child(sum4)

    sum3.add_child(ind_nodes[5], 0.5)
    sum3.add_child(ind_nodes[6], 0.5)
    sum4.add_child(ind_nodes[5], 0.4)
    sum4.add_child(ind_nodes[6], 0.6)

    # creating layers
    root_layer = ProductLayerLinked([root_node])
    sum_layer = SumLayerLinked([sum1, sum2])
    prod_layer = ProductLayerLinked([prod1, prod2])
    sum_layer2 = SumLayerLinked([sum3, sum4])

    # create the linked spn
    spn_linked = SpnLinked(input_layer=input_layer,
                           layers=[sum_layer2, prod_layer,
                                   sum_layer, root_layer])

    print(spn_linked)

    # converting to theano repr
    spn_theano = SpnFactory.linked_to_theano(spn_linked)
    print(spn_theano)

    # time for some inference comparison
    for instance in I:
        print('linked')
        res_l = spn_linked.eval(instance)
        print(res_l)
        print('theano')
        res_t = spn_theano.eval(instance)
        print(res_t)
        assert_array_almost_equal(res_l, res_t)
Пример #11
0
def test_toy_spn_numpy_linked():

    input_vec = numpy.array([[0., 0., 0.], [0., 0., 0.], [0., 1., 1.],
                             [MARG_IND, MARG_IND, MARG_IND]]).T

    ind_node_1 = CategoricalIndicatorNode(var=0, var_val=0)
    ind_node_2 = CategoricalIndicatorNode(var=0, var_val=1)
    ind_node_3 = CategoricalIndicatorNode(var=1, var_val=0)
    ind_node_4 = CategoricalIndicatorNode(var=1, var_val=1)
    ind_node_5 = CategoricalIndicatorNode(var=2, var_val=0)
    ind_node_6 = CategoricalIndicatorNode(var=2, var_val=1)

    input_layer = CategoricalInputLayer(nodes=[
        ind_node_1, ind_node_2, ind_node_3, ind_node_4, ind_node_5, ind_node_6
    ])

    n_nodes_layer_1 = 6
    layer_1_sum_nodes = [SumNode() for i in range(n_nodes_layer_1)]
    layer_1_sum_nodes[0].add_child(ind_node_1, 0.6)
    layer_1_sum_nodes[0].add_child(ind_node_2, 0.4)
    layer_1_sum_nodes[1].add_child(ind_node_1, 0.3)
    layer_1_sum_nodes[1].add_child(ind_node_2, 0.7)
    layer_1_sum_nodes[2].add_child(ind_node_3, 0.1)
    layer_1_sum_nodes[2].add_child(ind_node_4, 0.9)
    layer_1_sum_nodes[3].add_child(ind_node_3, 0.7)
    layer_1_sum_nodes[3].add_child(ind_node_4, 0.3)
    layer_1_sum_nodes[4].add_child(ind_node_5, 0.5)
    layer_1_sum_nodes[4].add_child(ind_node_6, 0.5)
    layer_1_sum_nodes[5].add_child(ind_node_5, 0.2)
    layer_1_sum_nodes[5].add_child(ind_node_6, 0.8)

    layer_1 = SumLayer(layer_1_sum_nodes)

    n_nodes_layer_2 = 4
    layer_2_prod_nodes = [ProductNode() for i in range(n_nodes_layer_2)]
    layer_2_prod_nodes[0].add_child(layer_1_sum_nodes[0])
    layer_2_prod_nodes[0].add_child(layer_1_sum_nodes[2])
    layer_2_prod_nodes[0].add_child(layer_1_sum_nodes[4])
    layer_2_prod_nodes[1].add_child(layer_1_sum_nodes[1])
    layer_2_prod_nodes[1].add_child(layer_1_sum_nodes[3])
    layer_2_prod_nodes[1].add_child(layer_1_sum_nodes[5])
    layer_2_prod_nodes[2].add_child(layer_1_sum_nodes[0])
    layer_2_prod_nodes[2].add_child(layer_1_sum_nodes[2])
    layer_2_prod_nodes[2].add_child(layer_1_sum_nodes[5])
    layer_2_prod_nodes[3].add_child(layer_1_sum_nodes[1])
    layer_2_prod_nodes[3].add_child(layer_1_sum_nodes[3])
    layer_2_prod_nodes[3].add_child(layer_1_sum_nodes[4])

    layer_2 = ProductLayer(layer_2_prod_nodes)

    root = SumNode()
    root.add_child(layer_2_prod_nodes[0], 0.2)
    root.add_child(layer_2_prod_nodes[1], 0.4)
    root.add_child(layer_2_prod_nodes[2], 0.15)
    root.add_child(layer_2_prod_nodes[3], 0.25)

    layer_3 = SumLayer([root])

    spn = Spn(input_layer=input_layer, layers=[layer_1, layer_2, layer_3])

    res = spn.eval(input_vec)
    print('First evaluation')
    print(res)
Пример #12
0
def test_mini_spn_fit_em():
    vars = numpy.array([2, 2, 2, 2])
    input_layer = CategoricalIndicatorLayer(vars=vars)

    print(input_layer)
    ind1 = input_layer._nodes[0]
    ind2 = input_layer._nodes[1]
    ind3 = input_layer._nodes[2]
    ind4 = input_layer._nodes[3]
    ind5 = input_layer._nodes[4]
    ind6 = input_layer._nodes[5]
    ind7 = input_layer._nodes[6]
    ind8 = input_layer._nodes[7]

    # creating a sum layer of 4 nodes
    sum1 = SumNode()
    sum2 = SumNode()
    sum3 = SumNode()
    sum4 = SumNode()

    sum1.add_child(ind1, 0.6)
    sum1.add_child(ind2, 0.4)
    sum2.add_child(ind3, 0.5)
    sum2.add_child(ind4, 0.5)
    sum3.add_child(ind5, 0.7)
    sum3.add_child(ind6, 0.3)
    sum4.add_child(ind7, 0.4)
    sum4.add_child(ind8, 0.6)

    sum_layer = SumLayer(nodes=[sum1, sum2, sum3, sum4])

    # and a top layer of 3 products
    prod1 = ProductNode()
    prod2 = ProductNode()
    prod3 = ProductNode()

    prod1.add_child(sum1)
    prod1.add_child(sum2)
    prod2.add_child(sum2)
    prod2.add_child(sum3)
    prod3.add_child(sum3)
    prod3.add_child(sum4)

    prod_layer = ProductLayer(nodes=[prod1, prod2, prod3])

    # root layer
    root = SumNode()

    root.add_child(prod1, 0.4)
    root.add_child(prod2, 0.25)
    root.add_child(prod3, 0.35)

    root_layer = SumLayer(nodes=[root])

    spn = Spn(input_layer=input_layer,
              layers=[sum_layer, prod_layer, root_layer])

    print(spn)

    # training on obs
    spn.fit_em(train=syn_train_data, valid=syn_val_data, test=None, hard=True)
Пример #13
0
def test_linked_to_theano_indicator():
    # creating single nodes
    root = SumNode()

    prod1 = ProductNode()
    prod2 = ProductNode()
    prod3 = ProductNode()

    sum1 = SumNode()
    sum2 = SumNode()
    sum3 = SumNode()
    sum4 = SumNode()

    ind1 = CategoricalIndicatorNode(var=0, var_val=0)
    ind2 = CategoricalIndicatorNode(var=0, var_val=1)
    ind3 = CategoricalIndicatorNode(var=1, var_val=0)
    ind4 = CategoricalIndicatorNode(var=1, var_val=1)
    ind5 = CategoricalIndicatorNode(var=2, var_val=0)
    ind6 = CategoricalIndicatorNode(var=2, var_val=1)
    ind7 = CategoricalIndicatorNode(var=2, var_val=2)
    ind8 = CategoricalIndicatorNode(var=3, var_val=0)
    ind9 = CategoricalIndicatorNode(var=3, var_val=1)
    ind10 = CategoricalIndicatorNode(var=3, var_val=2)
    ind11 = CategoricalIndicatorNode(var=3, var_val=3)

    prod4 = ProductNode()
    prod5 = ProductNode()
    prod6 = ProductNode()
    prod7 = ProductNode()

    # linking nodes
    root.add_child(prod1, 0.3)
    root.add_child(prod2, 0.3)
    root.add_child(prod3, 0.4)

    prod1.add_child(sum1)
    prod1.add_child(sum2)
    prod2.add_child(ind7)
    prod2.add_child(ind8)
    prod2.add_child(ind11)
    prod3.add_child(sum3)
    prod3.add_child(sum4)

    sum1.add_child(ind1, 0.3)
    sum1.add_child(ind2, 0.3)
    sum1.add_child(prod4, 0.4)

    sum2.add_child(ind2, 0.5)
    sum2.add_child(prod4, 0.2)
    sum2.add_child(prod5, 0.3)

    sum3.add_child(prod6, 0.5)
    sum3.add_child(prod7, 0.5)
    sum4.add_child(prod6, 0.5)
    sum4.add_child(prod7, 0.5)

    prod4.add_child(ind3)
    prod4.add_child(ind4)
    prod5.add_child(ind5)
    prod5.add_child(ind6)
    prod6.add_child(ind9)
    prod6.add_child(ind10)
    prod7.add_child(ind9)
    prod7.add_child(ind10)

    # building layers from nodes
    root_layer = SumLayerLinked([root])
    prod_layer = ProductLayerLinked([prod1, prod2, prod3])
    sum_layer = SumLayerLinked([sum1, sum2, sum3, sum4])
    aprod_layer = ProductLayerLinked([prod4, prod5, prod6, prod7])
    ind_layer = CategoricalIndicatorLayer(nodes=[
        ind1, ind2, ind3, ind4, ind5, ind6, ind7, ind8, ind9, ind10, ind11
    ])

    # creating the linked spn
    spn_linked = SpnLinked(
        input_layer=ind_layer,
        layers=[aprod_layer, sum_layer, prod_layer, root_layer])

    print(spn_linked)

    # converting to theano repr
    spn_theano = SpnFactory.linked_to_theano(spn_linked)
    print(spn_theano)

    # time for some inference comparison
    for instance in I:
        print('linked')
        res_l = spn_linked.eval(instance)
        print(res_l)
        print('theano')
        res_t = spn_theano.eval(instance)
        print(res_t)
        assert_array_almost_equal(res_l, res_t)
Пример #14
0
def test_linked_to_theano_categorical():
    vars = [2, 2, 3, 4]
    freqs = [{
        'var': 0,
        'freqs': [1, 2]
    }, {
        'var': 1,
        'freqs': [2, 2]
    }, {
        'var': 0,
        'freqs': [3, 2]
    }, {
        'var': 1,
        'freqs': [0, 3]
    }, {
        'var': 2,
        'freqs': [1, 0, 2]
    }, {
        'var': 3,
        'freqs': [1, 2, 1, 2]
    }, {
        'var': 3,
        'freqs': [3, 4, 0, 1]
    }]

    # create input layer first
    input_layer = CategoricalSmoothedLayer(vars=vars, node_dicts=freqs)
    # get nodes
    ind_nodes = [node for node in input_layer.nodes()]

    root_node = ProductNode()

    sum1 = SumNode()
    sum2 = SumNode()

    prod1 = ProductNode()
    prod2 = ProductNode()

    sum3 = SumNode()
    sum4 = SumNode()

    # linking
    root_node.add_child(sum1)
    root_node.add_child(sum2)
    root_node.add_child(ind_nodes[0])
    root_node.add_child(ind_nodes[1])

    sum1.add_child(ind_nodes[2], 0.4)
    sum1.add_child(ind_nodes[3], 0.6)
    sum2.add_child(ind_nodes[3], 0.2)
    sum2.add_child(prod1, 0.5)
    sum2.add_child(prod2, 0.3)

    prod1.add_child(ind_nodes[4])
    prod1.add_child(sum3)
    prod1.add_child(sum4)
    prod2.add_child(sum3)
    prod2.add_child(sum4)

    sum3.add_child(ind_nodes[5], 0.5)
    sum3.add_child(ind_nodes[6], 0.5)
    sum4.add_child(ind_nodes[5], 0.4)
    sum4.add_child(ind_nodes[6], 0.6)

    # creating layers
    root_layer = ProductLayerLinked([root_node])
    sum_layer = SumLayerLinked([sum1, sum2])
    prod_layer = ProductLayerLinked([prod1, prod2])
    sum_layer2 = SumLayerLinked([sum3, sum4])

    # create the linked spn
    spn_linked = SpnLinked(
        input_layer=input_layer,
        layers=[sum_layer2, prod_layer, sum_layer, root_layer])

    print(spn_linked)

    # converting to theano repr
    spn_theano = SpnFactory.linked_to_theano(spn_linked)
    print(spn_theano)

    # time for some inference comparison
    for instance in I:
        print('linked')
        res_l = spn_linked.eval(instance)
        print(res_l)
        print('theano')
        res_t = spn_theano.eval(instance)
        print(res_t)
        assert_array_almost_equal(res_l, res_t)
Пример #15
0
def test_build_linked_spn_from_scope_graph():

    #
    # creating a region graph as an input scope graph
    n_cols = 2
    n_rows = 2
    coarse = 2
    #
    # create initial region
    root_region = Region.create_whole_region(n_rows, n_cols)

    region_graph = create_poon_region_graph(root_region, coarse=coarse)

    # print(region_graph)
    print('# partitions', region_graph.n_partitions())
    print('# regions', region_graph.n_scopes())

    print(region_graph)

    #
    #
    k = 2
    spn = build_linked_spn_from_scope_graph(region_graph, k)

    print(spn)

    print(spn.stats())

    #
    # back to the scope graph
    root_layer = list(spn.root_layer().nodes())
    assert len(root_layer) == 1
    root = root_layer[0]

    scope_graph = get_scope_graph_from_linked_spn(root)
    print(scope_graph)

    assert scope_graph == region_graph

    #
    # building an spn from scratch
    #
    # building leaf nodes
    n_vars = 4
    vars = [0, 1, 2, 3]
    leaves = [
        CategoricalIndicatorNode(var, val) for var in range(n_vars)
        for val in [0, 1]
    ]
    input_layer = CategoricalIndicatorLayer(nodes=leaves, vars=vars)

    #
    # building root
    root_node = SumNode(var_scope=frozenset(vars))
    root_layer = SumLayer([root_node])

    #
    # building product nodes
    prod_list_1 = [ProductNode(var_scope=vars) for i in range(4)]
    prod_list_2 = [ProductNode(var_scope=vars) for i in range(4)]
    prod_nodes_1 = prod_list_1 + prod_list_2
    product_layer_1 = ProductLayer(prod_nodes_1)

    for p in prod_nodes_1:
        root_node.add_child(p, 1.0 / len(prod_nodes_1))

    #
    # build sum nodes
    sum_list_1 = [SumNode() for i in range(2)]
    sum_list_2 = [SumNode() for i in range(2)]
    sum_list_3 = [SumNode() for i in range(2)]
    sum_list_4 = [SumNode() for i in range(2)]

    sum_layer_2 = SumLayer(sum_list_1 + sum_list_2 + sum_list_3 + sum_list_4)

    sum_pairs = []
    for s_1 in sum_list_1:
        for s_2 in sum_list_2:
            sum_pairs.append((s_1, s_2))

    for p, (s_1, s_2) in zip(prod_list_1, sum_pairs):
        p.add_child(s_1)
        p.add_child(s_2)

    sum_pairs = []
    for s_3 in sum_list_3:
        for s_4 in sum_list_4:
            sum_pairs.append((s_3, s_4))

    for p, (s_3, s_4) in zip(prod_list_2, sum_pairs):
        p.add_child(s_3)
        p.add_child(s_4)

    #
    # again product nodes
    prod_list_3 = [ProductNode() for i in range(4)]
    prod_list_4 = [ProductNode() for i in range(4)]
    prod_list_5 = [ProductNode() for i in range(4)]
    prod_list_6 = [ProductNode() for i in range(4)]

    product_layer_3 = ProductLayer(prod_list_3 + prod_list_4 + prod_list_5 +
                                   prod_list_6)

    for s in sum_list_1:
        for p in prod_list_3:
            s.add_child(p, 1.0 / len(prod_list_3))

    for s in sum_list_2:
        for p in prod_list_4:
            s.add_child(p, 1.0 / len(prod_list_4))

    for s in sum_list_3:
        for p in prod_list_5:
            s.add_child(p, 1.0 / len(prod_list_5))

    for s in sum_list_4:
        for p in prod_list_6:
            s.add_child(p, 1.0 / len(prod_list_6))

    #
    # build sum nodes
    sum_list_5 = [SumNode() for i in range(2)]
    sum_list_6 = [SumNode() for i in range(2)]
    sum_list_7 = [SumNode() for i in range(2)]
    sum_list_8 = [SumNode() for i in range(2)]

    sum_layer_4 = SumLayer(sum_list_5 + sum_list_6 + sum_list_7 + sum_list_8)

    sum_pairs = []
    for s_5 in sum_list_5:
        for s_7 in sum_list_7:
            sum_pairs.append((s_5, s_7))

    for p, (s_5, s_7) in zip(prod_list_3, sum_pairs):
        p.add_child(s_5)
        p.add_child(s_7)

    sum_pairs = []
    for s_6 in sum_list_6:
        for s_8 in sum_list_8:
            sum_pairs.append((s_6, s_8))

    for p, (s_6, s_8) in zip(prod_list_4, sum_pairs):
        p.add_child(s_6)
        p.add_child(s_8)

    sum_pairs = []
    for s_5 in sum_list_5:
        for s_6 in sum_list_6:
            sum_pairs.append((s_5, s_6))

    for p, (s_5, s_6) in zip(prod_list_5, sum_pairs):
        p.add_child(s_5)
        p.add_child(s_6)

    sum_pairs = []
    for s_7 in sum_list_7:
        for s_8 in sum_list_8:
            sum_pairs.append((s_7, s_8))

    for p, (s_7, s_8) in zip(prod_list_6, sum_pairs):
        p.add_child(s_7)
        p.add_child(s_8)

    #
    # linking to input layer
    for s in sum_list_5:
        for i in leaves[0:2]:
            s.add_child(i, 0.5)

    for s in sum_list_6:
        for i in leaves[2:4]:
            s.add_child(i, 0.5)

    for s in sum_list_7:
        for i in leaves[4:6]:
            s.add_child(i, 0.5)

    for s in sum_list_8:
        for i in leaves[6:]:
            s.add_child(i, 0.5)

    lspn = LinkedSpn(input_layer=input_layer,
                     layers=[
                         sum_layer_4, product_layer_3, sum_layer_2,
                         product_layer_1, root_layer
                     ])
    print(lspn)
    print(lspn.stats())

    #
    # trying to evaluate them
    input_vec = numpy.array([[1., 1., 1., 0.], [0., 0., 0., 0.],
                             [0., 1., 1., 0.],
                             [MARG_IND, MARG_IND, MARG_IND, MARG_IND]]).T

    res = spn.eval(input_vec)
    print('First evaluation')
    print(res)

    res = lspn.eval(input_vec)
    print('Second evaluation')
    print(res)