Пример #1
0
def test_grad_apply():
    num_visible_units = 100
    num_hidden_units = 50

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.BernoulliLayer(num_visible_units)
    hid_layer = layers.BernoulliLayer(num_hidden_units)
    rbm = model.Model([vis_layer, hid_layer])

    # create a gradient object filled with random numbers
    grad = gu.random_grad(rbm)
    gu.grad_apply(be.square, grad)
Пример #2
0
def test_bernoulli_GFE_derivatives():
    # Tests that the GFE derivative update increases GFE versus 100
    # random update vectors
    num_units = 5

    layer_1 = layers.BernoulliLayer(num_units)
    layer_2 = layers.BernoulliLayer(num_units)
    layer_3 = layers.BernoulliLayer(num_units)
    rbm = BoltzmannMachine([layer_1, layer_2, layer_3])

    for i in range(len(rbm.connections)):
        rbm.connections[i].weights.params.matrix[:] = \
        0.01 * be.randn(rbm.connections[i].shape)

    for lay in rbm.layers:
        lay.params.loc[:] = be.rand_like(lay.params.loc)

    state, cop1_GFE = rbm.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)
    grad = rbm._grad_gibbs_free_energy(state)
    gu.grad_normalize_(grad)

    for i in range(100):
        lr = 1.0
        gogogo = True
        random_grad = gu.random_grad(rbm)
        gu.grad_normalize_(random_grad)
        while gogogo:
            cop1 = deepcopy(rbm)
            lr_mul = partial(be.tmul, lr)

            cop1.parameter_update(gu.grad_apply(lr_mul, grad))
            cop1_state, cop1_GFE = cop1.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)

            cop2 = deepcopy(rbm)
            cop2.parameter_update(gu.grad_apply(lr_mul, random_grad))
            cop2_state, cop2_GFE = cop2.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)

            regress = cop2_GFE - cop1_GFE < 0.0

            if regress:
                if lr < 1e-6:
                    assert False, \
                    "TAP FE gradient is not working properly for Bernoulli models"
                    break
                else:
                    lr *= 0.5
            else:
                break
Пример #3
0
def test_gaussian_GFE_derivatives_gradient_descent():
    num_units = 5

    layer_1 = layers.GaussianLayer(num_units)
    layer_2 = layers.BernoulliLayer(num_units)

    rbm = BoltzmannMachine([layer_1, layer_2])

    for i in range(len(rbm.connections)):
        rbm.connections[i].weights.params.matrix[:] = \
        0.01 * be.randn(rbm.connections[i].shape)

    for lay in rbm.layers:
        lay.params.loc[:] = be.rand_like(lay.params.loc)

    state, GFE = rbm.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50)
    grad = rbm._grad_gibbs_free_energy(state)
    gu.grad_normalize_(grad)

    for i in range(100):
        lr = 0.001
        gogogo = True
        random_grad = gu.random_grad(rbm)
        gu.grad_normalize_(random_grad)
        while gogogo:
            cop1 = deepcopy(rbm)
            lr_mul = partial(be.tmul, lr)

            cop1.parameter_update(gu.grad_apply(lr_mul, grad))
            cop1_state, cop1_GFE = cop1.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50)

            cop2 = deepcopy(rbm)
            cop2.parameter_update(gu.grad_apply(lr_mul, random_grad))
            cop2_state, cop2_GFE = cop2.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50)

            regress = cop2_GFE - cop1_GFE < 0

            if regress:
                if lr < 1e-6:
                    assert False, \
                    "TAP FE gradient is not working properly for Gaussian models"
                    break
                else:
                    lr *= 0.5
            else:
                break
Пример #4
0
def test_bernoulli_GFE_derivatives():
    num_units = 500

    layer_1 = layers.BernoulliLayer(num_units)
    layer_2 = layers.BernoulliLayer(num_units)
    layer_3 = layers.BernoulliLayer(num_units)

    rbm = model.Model([layer_1, layer_2, layer_3])
    for i in range(len(rbm.weights)):
        rbm.weights[i].params.matrix[:] = \
        0.01 * be.randn(rbm.weights[i].shape)

    for lay in rbm.layers:
        lay.params.loc[:] = be.rand_like(lay.params.loc)

    state = rbm.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)
    GFE = rbm.gibbs_free_energy(state)

    lr = 0.1
    gogogo = True
    grad = rbm.grad_TAP_free_energy(0.1, 1e-7, 50)
    while gogogo:
        cop = deepcopy(rbm)
        lr_mul = partial(be.tmul, -lr)

        delta = gu.grad_apply(lr_mul, grad)
        cop.parameter_update(delta)

        cop_state = cop.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)
        cop_GFE = cop.gibbs_free_energy(cop_state)

        regress = cop_GFE - GFE < 0.0
        print(lr, cop_GFE, GFE, cop_GFE - GFE, regress)
        if regress:
            if lr < 1e-6:
                assert False, \
                "TAP FE gradient is not working properly for Bernoulli models"
                break
            else:
                lr *= 0.5
        else:
            break