def test_ais_with_semirbm_dbn(self): dbn = DBN(RBM(5, 5)) dbn.add_layer(SemiRBM(5, 5)) ais = Estimator(dbn) ais.estimate_log_partition_function(100, np.arange(0, 1, 1E-3), layer=0) ais.estimate_log_partition_function(10, np.arange(0, 1, 1E-3), layer=1) dbn[0]._brf_logz = utils.logsumexp(dbn[0]._ulogprob_vis( utils.binary_numbers(dbn[0].X.shape[0]))) dbn[1]._brf_logz = utils.logsumexp(dbn[1]._ulogprob_vis( utils.binary_numbers(dbn[1].X.shape[0]))) samples = np.concatenate( [dbn.sample(25, 100, 20), np.matrix(np.random.rand(5, 25) > 0.5)], 1) Y = utils.binary_numbers(dbn[0].Y.shape[0]) X = utils.binary_numbers(dbn[0].X.shape[0]) logRy = dbn[1]._ulogprob_vis(Y) logQy = utils.logsumexp(dbn[0]._ulogprob(X, Y, all_pairs=True), 0) log_sum = utils.logsumexp( dbn[0]._clogprob_hid_vis(samples, Y, all_pairs=True) - logQy + logRy, 1) logPx = log_sum + dbn[0]._ulogprob_vis(samples) - dbn[1]._brf_logz logPx_ = ais.estimate_log_probability(samples)[0] self.assertTrue(np.abs(logPx_.mean() - logPx.mean()) < 0.1)
def test_ais_with_semirbm_sanity_check(self): grbm = GaussianRBM(15, 50) grbm.b = np.random.randn(grbm.b.shape[0], 1) grbm.c = np.random.randn(grbm.c.shape[0], 1) srbm = SemiRBM(50, 20) srbm.W = srbm.W * 0. srbm.c = srbm.c * 0. srbm.L = grbm.W.T * grbm.W srbm.b = grbm.W.T * grbm.b + grbm.c + 0.5 * np.matrix(np.diag( srbm.L)).T srbm.L = srbm.L - np.matrix(np.diag(np.diag(srbm.L))) ais = Estimator(grbm) ais.estimate_log_partition_function(num_ais_samples=100, beta_weights=np.arange(0, 1, 1E-3)) ais = Estimator(srbm) ais.estimate_log_partition_function(num_ais_samples=100, beta_weights=np.arange(0, 1, 1E-2)) glogz = grbm._ais_logz + srbm.Y.shape[0] * np.log(2) slogz = srbm._ais_logz + grbm.X.shape[0] * np.log(np.sqrt(2 * np.pi)) self.assertTrue(np.abs(glogz - slogz) < 1.)
def test_ais_with_semirbm(self): rbm = SemiRBM(5, 20) rbm.L = np.matrix(np.random.randn(5, 5)) rbm.L = np.triu(rbm.L) + np.triu(rbm.L).T - 2 * np.diag(np.diag(rbm.L)) rbm.num_lateral_updates = 5 rbm.sampling_method = SemiRBM.GIBBS ais = Estimator(rbm) ais_logz = ais.estimate_log_partition_function(100, np.arange(0, 1, 0.001)) brf_logz = np.log(np.sum(np.exp(rbm._ulogprob_vis(utils.binary_numbers(rbm.X.shape[0]))))) lower = np.log(np.exp(ais_logz) - 4 * np.sqrt(rbm._ais_var)) upper = np.log(np.exp(ais_logz) + 4 * np.sqrt(rbm._ais_var)) self.assertTrue(upper - lower < 1.) self.assertTrue(lower < brf_logz and brf_logz < upper)
def test_ais_with_semirbm(self): rbm = SemiRBM(5, 20) rbm.L = np.matrix(np.random.randn(5, 5)) rbm.L = np.triu(rbm.L) + np.triu(rbm.L).T - 2 * np.diag(np.diag(rbm.L)) rbm.num_lateral_updates = 5 rbm.sampling_method = SemiRBM.GIBBS ais = Estimator(rbm) ais_logz = ais.estimate_log_partition_function(100, np.arange(0, 1, 0.001)) brf_logz = np.log( np.sum( np.exp(rbm._ulogprob_vis(utils.binary_numbers( rbm.X.shape[0]))))) lower = np.log(np.exp(ais_logz) - 4 * np.sqrt(rbm._ais_var)) upper = np.log(np.exp(ais_logz) + 4 * np.sqrt(rbm._ais_var)) self.assertTrue(upper - lower < 1.) self.assertTrue(lower < brf_logz and brf_logz < upper)
def test_ais_with_semirbm_sanity_check(self): grbm = GaussianRBM(15, 50) grbm.b = np.random.randn(grbm.b.shape[0], 1) grbm.c = np.random.randn(grbm.c.shape[0], 1) srbm = SemiRBM(50, 20) srbm.W = srbm.W * 0. srbm.c = srbm.c * 0. srbm.L = grbm.W.T * grbm.W srbm.b = grbm.W.T * grbm.b + grbm.c + 0.5 * np.matrix(np.diag(srbm.L)).T srbm.L = srbm.L - np.matrix(np.diag(np.diag(srbm.L))) ais = Estimator(grbm) ais.estimate_log_partition_function(num_ais_samples=100, beta_weights=np.arange(0, 1, 1E-3)) ais = Estimator(srbm) ais.estimate_log_partition_function(num_ais_samples=100, beta_weights=np.arange(0, 1, 1E-2)) glogz = grbm._ais_logz + srbm.Y.shape[0] * np.log(2) slogz = srbm._ais_logz + grbm.X.shape[0] * np.log(np.sqrt(2 * np.pi)) self.assertTrue(np.abs(glogz - slogz) < 1.)
def test_all_pairs(self): srbm = SemiRBM(10, 10) srbm.W = np.matrix(np.random.randn(srbm.X.shape[0], srbm.Y.shape[0])) srbm.b = np.matrix(np.random.rand(srbm.X.shape[0], 1)) srbm.c = np.matrix(np.random.randn(srbm.Y.shape[0], 1)) examples_vis = np.matrix(np.random.rand(srbm.X.shape[0], 100) < 0.5) examples_hid = np.matrix(np.random.rand(srbm.Y.shape[0], 100) < 0.5) logprob1 = srbm._ulogprob(examples_vis, examples_hid) logprob2 = np.diag( srbm._ulogprob(examples_vis, examples_hid, all_pairs=True)) self.assertTrue(np.abs(logprob1 - logprob2).sum() < 1E-10) logprob1 = srbm._ulogprob(examples_vis[:, 1], examples_hid) logprob2 = srbm._ulogprob(examples_vis, examples_hid, all_pairs=True)[1, :] self.assertTrue(np.abs(logprob1 - logprob2).sum() < 1E-10) logprob1 = srbm._ulogprob(examples_vis, examples_hid[:, 1]) logprob2 = srbm._ulogprob(examples_vis, examples_hid, all_pairs=True)[:, 1].T self.assertTrue(np.abs(logprob1 - logprob2).sum() < 1E-10) logprob1 = srbm._clogprob_hid_vis(examples_vis, examples_hid) logprob2 = np.diag( srbm._clogprob_hid_vis(examples_vis, examples_hid, all_pairs=True)) self.assertTrue(np.abs(logprob1 - logprob2).sum() < 1E-10) logprob1 = srbm._clogprob_hid_vis(examples_vis[:, 1], examples_hid) logprob2 = srbm._clogprob_hid_vis(examples_vis, examples_hid, all_pairs=True)[1, :] self.assertTrue(np.abs(logprob1 - logprob2).sum() < 1E-10) logprob1 = srbm._clogprob_hid_vis(examples_vis, examples_hid[:, 1]) logprob2 = srbm._clogprob_hid_vis(examples_vis, examples_hid, all_pairs=True)[:, 1].T self.assertTrue(np.abs(logprob1 - logprob2).sum() < 1E-10)
def test_probabilities(self): srbm = SemiRBM(9, 12) srbm.W = np.matrix(np.random.randn(srbm.X.shape[0], srbm.Y.shape[0])) srbm.b = np.matrix(np.random.rand(srbm.X.shape[0], 1)) srbm.c = np.matrix(np.random.randn(srbm.Y.shape[0], 1)) srbm.L = np.matrix(np.random.randn(srbm.X.shape[0], srbm.X.shape[0])) / 2. srbm.L = np.triu(srbm.L) + np.triu( srbm.L).T - 2. * np.diag(np.diag(srbm.L)) examples_vis = np.matrix(np.random.rand(srbm.X.shape[0], 100) < 0.5) examples_hid = np.matrix(np.random.rand(srbm.Y.shape[0], 100) < 0.5) states_vis = utils.binary_numbers(srbm.X.shape[0]) states_hid = utils.binary_numbers(srbm.Y.shape[0]) # check that conditional probabilities are normalized logprobs = srbm._clogprob_hid_vis(examples_vis, states_hid, all_pairs=True) self.assertTrue(np.all(utils.logsumexp(logprobs, 1) < 1E-10)) # test for consistency logprobs1 = srbm._ulogprob(examples_vis, examples_hid, all_pairs=True) logprobs3 = srbm._clogprob_hid_vis(examples_vis, examples_hid, all_pairs=True) \ + srbm._ulogprob_vis(examples_vis).T self.assertTrue(np.all(np.abs(logprobs1 - logprobs3) < 1E-10)) rbm = RBM(srbm.X.shape[0], srbm.Y.shape[0]) rbm.W = srbm.W rbm.b = srbm.b rbm.c = srbm.c srbm.L *= 0 logprobs1 = rbm._ulogprob_vis(examples_vis) logprobs2 = srbm._ulogprob_vis(examples_vis) self.assertTrue(np.all(np.abs(logprobs1 - logprobs2) < 1E-10)) logprobs1 = rbm._clogprob_hid_vis(examples_vis, examples_hid) logprobs2 = srbm._clogprob_hid_vis(examples_vis, examples_hid) self.assertTrue(np.all(np.abs(logprobs1 - logprobs2) < 1E-10))
def main(argv): # load preprocessed data samples print 'loading data...\t', data = load('./data/vanhateren.npz') print '[DONE]' print # remove DC component (first component) data_train = data['train'][1:, :] data_test = data['test'][1:, :] # create 1st layer dbn = DBN(GaussianRBM(num_visibles=data_train.shape[0], num_hiddens=100)) # hyperparameters dbn[0].learning_rate = 5E-3 dbn[0].weight_decay = 1E-2 dbn[0].momentum = 0.9 dbn[0].sigma = 0.65 dbn[0].cd_steps = 1 dbn[0].persistent = True # train 1st layer print 'training...\t', dbn.train(data_train, num_epochs=100, batch_size=100) print '[DONE]' # evaluate 1st layer print 'evaluating...\t', logptf = dbn.estimate_log_partition_function(num_ais_samples=100, beta_weights=arange(0, 1, 1E-3)) loglik = dbn.estimate_log_likelihood(data_test) print '[DONE]' print print 'estimated log-partf.:\t', logptf print 'estimated log-loss:\t', -loglik / data_test.shape[0] / log(2) print # create 2nd layer dbn.add_layer(SemiRBM(num_visibles=100, num_hiddens=100)) # initialize parameters dbn[1].L = dbn[0].W.T * dbn[0].W dbn[1].b = dbn[0].W.T * dbn[0].b + dbn[0].c + 0.5 * asmatrix(diag(dbn[1].L)).T dbn[1].L = dbn[1].L - asmatrix(diag(diag(dbn[1].L))) # hyperparameters dbn[1].learning_rate = 5E-3 dbn[1].learning_rate_lateral = 5E-4 dbn[1].weight_decay = 5E-3 dbn[1].weight_decay_lateral = 5E-3 dbn[1].momentum = 0.9 dbn[1].momentum_lateral = 0.9 dbn[1].num_lateral_updates = 20 dbn[1].damping = 0.2 dbn[1].cd_steps = 1 dbn[1].persistent = True # train 2nd layer print 'training...\t', dbn.train(data_train, num_epochs=100, batch_size=100) print '[DONE]' # evaluate 2nd layer print 'evaluating...\t', logptf = dbn.estimate_log_partition_function(num_ais_samples=100, beta_weights=arange(0, 1, 1E-3)) loglik = dbn.estimate_log_likelihood(data_test, num_samples=100) print '[DONE]' print print 'estimated log-partf.:\t', logptf print 'estimated log-loss:\t', -loglik / data_test.shape[0] / log(2) print # fine-tune with wake-sleep dbn[0].learning_rate /= 4. dbn[1].learning_rate /= 4. print 'fine-tuning...\t', dbn.train_wake_sleep(data_train, num_epochs=10, batch_size=10) print '[DONE]' # reevaluate print 'evaluating...\t', logptf = dbn.estimate_log_partition_function(num_ais_samples=100, beta_weights=arange(0, 1, 1E-3)) loglik = dbn.estimate_log_likelihood(data_test, num_samples=100) print '[DONE]' print print 'estimated log-partf.:\t', logptf print 'estimated log-loss:\t', -loglik / data_test.shape[0] / log(2) return 0
def test_probabilities(self): srbm = SemiRBM(9, 12) srbm.W = np.matrix(np.random.randn(srbm.X.shape[0], srbm.Y.shape[0])) srbm.b = np.matrix(np.random.rand(srbm.X.shape[0], 1)) srbm.c = np.matrix(np.random.randn(srbm.Y.shape[0], 1)) srbm.L = np.matrix(np.random.randn(srbm.X.shape[0], srbm.X.shape[0])) / 2. srbm.L = np.triu(srbm.L) + np.triu(srbm.L).T - 2. * np.diag(np.diag(srbm.L)) examples_vis = np.matrix(np.random.rand(srbm.X.shape[0], 100) < 0.5) examples_hid = np.matrix(np.random.rand(srbm.Y.shape[0], 100) < 0.5) states_vis = utils.binary_numbers(srbm.X.shape[0]) states_hid = utils.binary_numbers(srbm.Y.shape[0]) # check that conditional probabilities are normalized logprobs = srbm._clogprob_hid_vis(examples_vis, states_hid, all_pairs=True) self.assertTrue(np.all(utils.logsumexp(logprobs, 1) < 1E-10)) # test for consistency logprobs1 = srbm._ulogprob(examples_vis, examples_hid, all_pairs=True) logprobs3 = srbm._clogprob_hid_vis(examples_vis, examples_hid, all_pairs=True) \ + srbm._ulogprob_vis(examples_vis).T self.assertTrue(np.all(np.abs(logprobs1 - logprobs3) < 1E-10)) rbm = RBM(srbm.X.shape[0], srbm.Y.shape[0]) rbm.W = srbm.W rbm.b = srbm.b rbm.c = srbm.c srbm.L *= 0 logprobs1 = rbm._ulogprob_vis(examples_vis) logprobs2 = srbm._ulogprob_vis(examples_vis) self.assertTrue(np.all(np.abs(logprobs1 - logprobs2) < 1E-10)) logprobs1 = rbm._clogprob_hid_vis(examples_vis, examples_hid) logprobs2 = srbm._clogprob_hid_vis(examples_vis, examples_hid) self.assertTrue(np.all(np.abs(logprobs1 - logprobs2) < 1E-10))
def test_all_pairs(self): srbm = SemiRBM(10, 10) srbm.W = np.matrix(np.random.randn(srbm.X.shape[0], srbm.Y.shape[0])) srbm.b = np.matrix(np.random.rand(srbm.X.shape[0], 1)) srbm.c = np.matrix(np.random.randn(srbm.Y.shape[0], 1)) examples_vis = np.matrix(np.random.rand(srbm.X.shape[0], 100) < 0.5) examples_hid = np.matrix(np.random.rand(srbm.Y.shape[0], 100) < 0.5) logprob1 = srbm._ulogprob(examples_vis, examples_hid) logprob2 = np.diag(srbm._ulogprob(examples_vis, examples_hid, all_pairs=True)) self.assertTrue(np.abs(logprob1 - logprob2).sum() < 1E-10) logprob1 = srbm._ulogprob(examples_vis[:, 1], examples_hid) logprob2 = srbm._ulogprob(examples_vis, examples_hid, all_pairs=True)[1, :] self.assertTrue(np.abs(logprob1 - logprob2).sum() < 1E-10) logprob1 = srbm._ulogprob(examples_vis, examples_hid[:, 1]) logprob2 = srbm._ulogprob(examples_vis, examples_hid, all_pairs=True)[:, 1].T self.assertTrue(np.abs(logprob1 - logprob2).sum() < 1E-10) logprob1 = srbm._clogprob_hid_vis(examples_vis, examples_hid) logprob2 = np.diag(srbm._clogprob_hid_vis(examples_vis, examples_hid, all_pairs=True)) self.assertTrue(np.abs(logprob1 - logprob2).sum() < 1E-10) logprob1 = srbm._clogprob_hid_vis(examples_vis[:, 1], examples_hid) logprob2 = srbm._clogprob_hid_vis(examples_vis, examples_hid, all_pairs=True)[1, :] self.assertTrue(np.abs(logprob1 - logprob2).sum() < 1E-10) logprob1 = srbm._clogprob_hid_vis(examples_vis, examples_hid[:, 1]) logprob2 = srbm._clogprob_hid_vis(examples_vis, examples_hid, all_pairs=True)[:, 1].T self.assertTrue(np.abs(logprob1 - logprob2).sum() < 1E-10)