示例#1
0
def test_bernoulli_GFE_derivatives():
    # Tests that the GFE derivative update increases GFE versus 100
    # random update vectors
    num_units = 5

    layer_1 = layers.BernoulliLayer(num_units)
    layer_2 = layers.BernoulliLayer(num_units)
    layer_3 = layers.BernoulliLayer(num_units)
    rbm = BoltzmannMachine([layer_1, layer_2, layer_3])

    for i in range(len(rbm.connections)):
        rbm.connections[i].weights.params.matrix[:] = \
        0.01 * be.randn(rbm.connections[i].shape)

    for lay in rbm.layers:
        lay.params.loc[:] = be.rand_like(lay.params.loc)

    state, cop1_GFE = rbm.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)
    grad = rbm._grad_gibbs_free_energy(state)
    gu.grad_normalize_(grad)

    for i in range(100):
        lr = 1.0
        gogogo = True
        random_grad = gu.random_grad(rbm)
        gu.grad_normalize_(random_grad)
        while gogogo:
            cop1 = deepcopy(rbm)
            lr_mul = partial(be.tmul, lr)

            cop1.parameter_update(gu.grad_apply(lr_mul, grad))
            cop1_state, cop1_GFE = cop1.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)

            cop2 = deepcopy(rbm)
            cop2.parameter_update(gu.grad_apply(lr_mul, random_grad))
            cop2_state, cop2_GFE = cop2.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)

            regress = cop2_GFE - cop1_GFE < 0.0

            if regress:
                if lr < 1e-6:
                    assert False, \
                    "TAP FE gradient is not working properly for Bernoulli models"
                    break
                else:
                    lr *= 0.5
            else:
                break
示例#2
0
def test_gaussian_GFE_derivatives_gradient_descent():
    num_units = 5

    layer_1 = layers.GaussianLayer(num_units)
    layer_2 = layers.BernoulliLayer(num_units)

    rbm = BoltzmannMachine([layer_1, layer_2])

    for i in range(len(rbm.connections)):
        rbm.connections[i].weights.params.matrix[:] = \
        0.01 * be.randn(rbm.connections[i].shape)

    for lay in rbm.layers:
        lay.params.loc[:] = be.rand_like(lay.params.loc)

    state, GFE = rbm.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50)
    grad = rbm._grad_gibbs_free_energy(state)
    gu.grad_normalize_(grad)

    for i in range(100):
        lr = 0.001
        gogogo = True
        random_grad = gu.random_grad(rbm)
        gu.grad_normalize_(random_grad)
        while gogogo:
            cop1 = deepcopy(rbm)
            lr_mul = partial(be.tmul, lr)

            cop1.parameter_update(gu.grad_apply(lr_mul, grad))
            cop1_state, cop1_GFE = cop1.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50)

            cop2 = deepcopy(rbm)
            cop2.parameter_update(gu.grad_apply(lr_mul, random_grad))
            cop2_state, cop2_GFE = cop2.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50)

            regress = cop2_GFE - cop1_GFE < 0

            if regress:
                if lr < 1e-6:
                    assert False, \
                    "TAP FE gradient is not working properly for Gaussian models"
                    break
                else:
                    lr *= 0.5
            else:
                break