def test_pca_svd_save_read(): # create some random data num_samples = 10000 dim = 10 num_components = 3 # generate some data mean = np.random.random(dim) cov_factor = np.random.random((dim, dim)) cov = np.dot(cov_factor, cov_factor.T) samples = be.float_tensor( np.random.multivariate_normal(mean, cov, size=num_samples)) # find the principal directions pca = factorization.PCA.from_svd(samples, num_components) # save it pca_file = tempfile.NamedTemporaryFile() store = pd.HDFStore(pca_file.name, mode="w") pca.save(store) # read it pca_read = factorization.PCA.from_saved(store) store.close() # check it assert be.allclose(pca.W, pca_read.W) assert be.allclose(pca.var, pca_read.var) assert pca.stepsize == pca_read.stepsize assert pca.num_components == pca_read.num_components
def test_grbm_reload(): vis_layer = layers.BernoulliLayer(num_vis, center=True) hid_layer = layers.GaussianLayer(num_hid, center=True) # create some extrinsics grbm = BoltzmannMachine([vis_layer, hid_layer]) data = batch.Batch({ 'train': batch.InMemoryTable(be.randn((10 * num_samples, num_vis)), num_samples) }) grbm.initialize(data) with tempfile.NamedTemporaryFile() as file: # save the model store = pandas.HDFStore(file.name, mode='w') grbm.save(store) store.close() # reload store = pandas.HDFStore(file.name, mode='r') grbm_reload = BoltzmannMachine.from_saved(store) store.close() # check the two models are consistent vis_data = vis_layer.random((num_samples, num_vis)) data_state = State.from_visible(vis_data, grbm) vis_orig = grbm.deterministic_iteration(1, data_state)[0] vis_reload = grbm_reload.deterministic_iteration(1, data_state)[0] assert be.allclose(vis_orig, vis_reload) assert be.allclose(grbm.layers[0].moments.mean, grbm_reload.layers[0].moments.mean) assert be.allclose(grbm.layers[0].moments.var, grbm_reload.layers[0].moments.var) assert be.allclose(grbm.layers[1].moments.mean, grbm_reload.layers[1].moments.mean) assert be.allclose(grbm.layers[1].moments.var, grbm_reload.layers[1].moments.var)
def test_exponential_derivatives(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.ExponentialLayer(num_visible_units) hid_layer = layers.ExponentialLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters # for the exponential layers, we need a > 0, b > 0, and W < 0 a = be.rand((num_visible_units, )) b = be.rand((num_hidden_units, )) W = -be.rand((num_visible_units, num_hidden_units)) rbm.layers[0].int_params.loc[:] = a rbm.layers[1].int_params.loc[:] = b rbm.weights[0].int_params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) vdata_scaled = rbm.layers[0].rescale(vdata) # compute the mean of the hidden layer rbm.layers[1].update([vdata], [rbm.weights[0].W()]) hid_mean = rbm.layers[1].mean() hid_mean_scaled = rbm.layers[1].rescale(hid_mean) # compute the derivatives d_visible_loc = be.mean(vdata, axis=0) d_hidden_loc = be.mean(hid_mean_scaled, axis=0) d_W = -be.batch_outer(vdata, hid_mean_scaled) / len(vdata) # compute the derivatives using the layer functions vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.weights[0].W()]) hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.weights[0].W_T()]) weight_derivs = rbm.weights[0].derivatives(vdata, hid_mean_scaled) assert be.allclose(d_visible_loc, vis_derivs.loc), \ "derivative of visible loc wrong in exponential-exponential rbm" assert be.allclose(d_hidden_loc, hid_derivs.loc), \ "derivative of hidden loc wrong in exponential-exponential rbm" assert be.allclose(d_W, weight_derivs.matrix), \ "derivative of weights wrong in exponential-exponential rbm"
def test_bernoulli_derivatives(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].int_params['loc'] = a rbm.layers[1].int_params['loc'] = b rbm.weights[0].int_params['matrix'] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) vdata_scaled = rbm.layers[0].rescale(vdata) # compute the mean of the hidden layer rbm.layers[1].update(vdata, rbm.weights[0].W()) hid_mean = rbm.layers[1].mean() hid_mean_scaled = rbm.layers[1].rescale(hid_mean) # compute the derivatives d_visible_loc = -be.mean(vdata, axis=0) d_hidden_loc = -be.mean(hid_mean_scaled, axis=0) d_W = -be.batch_outer(vdata, hid_mean_scaled) / len(vdata) # compute the derivatives using the layer functions vis_derivs = rbm.layers[0].derivatives(vdata, hid_mean_scaled, rbm.weights[0].W()) hid_derivs = rbm.layers[1].derivatives(hid_mean, vdata_scaled, be.transpose(rbm.weights[0].W())) weight_derivs = rbm.weights[0].derivatives(vdata, hid_mean_scaled) assert be.allclose(d_visible_loc, vis_derivs['loc']), \ "derivative of visible loc wrong in bernoulli-bernoulli rbm" assert be.allclose(d_hidden_loc, hid_derivs['loc']), \ "derivative of hidden loc wrong in bernoulli-bernoulli rbm" assert be.allclose(d_W, weight_derivs['matrix']), \ "derivative of weights wrong in bernoulli-bernoulli rbm"
def test_state_for_grad_DrivenSequentialMC(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) data_state = State.from_visible(vdata, rbm) dropout_scale = State.dropout_rescale(rbm) # since we set no dropout, dropout_scale should be None assert dropout_scale is None for u in [ 'markov_chain', 'mean_field_iteration', 'deterministic_iteration' ]: # set up the sampler sampler = fit.DrivenSequentialMC(rbm, updater=u, clamped=[0]) sampler.set_state(data_state) # update the state of the hidden layer grad_state = sampler.state_for_grad(1, dropout_scale) assert be.allclose(data_state.units[0], grad_state.units[0]), \ "visible layer is clamped, and shouldn't get updated: {}".format(u) assert not be.allclose(data_state.units[1], grad_state.units[1]), \ "hidden layer is not clamped, and should get updated: {}".format(u) # compute the conditional mean with the layer function ave = rbm.layers[1].conditional_mean( rbm._connected_rescaled_units(1, data_state, dropout_scale), rbm._connected_weights(1)) assert be.allclose(ave, grad_state.units[1]), \ "hidden layer of grad_state should be conditional mean: {}".format(u)
def test_find_k_nearest_neighbors(): n = 20 shp = (20, n) perm = be.rand_int(0, 20, (20, )) k = 1 be.set_seed() y = be.randn(shp) x = y[perm] indices, _distances = math_utils.find_k_nearest_neighbors(x, y, k) assert be.allclose(indices, perm) assert be.allclose(_distances, be.zeros((20, )), 1e-2, 1e-2)
def test_one_hot(): categories = range(10) labels = be.unsqueeze(be.long_tensor(np.arange(100) // 10), 1) hots = pre.one_hot(labels, categories) hots_ref = be.zeros((len(labels), len(categories))) be.scatter_(hots_ref, be.long_tensor(np.arange(100) // 10), 1) assert be.allclose(hots, hots_ref)
def test_in_memory_table_batch(): # create data num_rows = 10000 num_cols = 10 tensor = be.rand((num_rows, num_cols)) # batch it with InMemoryTable batch_size = 1000 num_train_batches = num_rows // batch_size data = batch.InMemoryTable(tensor, batch_size) # loop through, checking the data i_batch = 0 while True: # get the data try: batch_data = data.get() except StopIteration: assert i_batch == num_train_batches i_batch = 0 break # check it assert be.allclose( batch_data, tensor[i_batch * batch_size:(i_batch + 1) * batch_size]) i_batch += 1
def test_clamped_SequentialMC(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 steps = 1 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) data_state = State.from_visible(vdata, rbm) dropout_scale = State.dropout_rescale(rbm) # since we set no dropout, dropout_scale should be None assert dropout_scale is None for u in [ 'markov_chain', 'mean_field_iteration', 'deterministic_iteration' ]: # set up the sampler with the visible layer clamped sampler = fit.SequentialMC(rbm, updater=u, clamped=[0]) sampler.set_state(data_state) # update the sampler state and check the output sampler.update_state(steps, dropout_scale) assert be.allclose(data_state.units[0], sampler.state.units[0]), \ "visible layer is clamped, and shouldn't get updated: {}".format(u) assert not be.allclose(data_state.units[1], sampler.state.units[1]), \ "hidden layer is not clamped, and should get updated: {}".format(u)
def test_exponential_conditional_params(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.ExponentialLayer(num_visible_units) hid_layer = layers.ExponentialLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters # for the exponential layers, we need a > 0, b > 0, and W < 0 a = be.rand((num_visible_units, )) b = be.rand((num_hidden_units, )) W = -be.rand((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute conditional parameters hidden_rate = -be.dot(vdata, W) # (batch_size, num_hidden_units) hidden_rate += be.broadcast(b, hidden_rate) visible_rate = -be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units) visible_rate += be.broadcast(a, visible_rate) # compute the conditional parameters using the layer functions hidden_rate_func = rbm.layers[1]._conditional_params([vdata], [rbm.weights[0].W()]) visible_rate_func = rbm.layers[0]._conditional_params( [hdata], [rbm.weights[0].W_T()]) assert be.allclose(hidden_rate, hidden_rate_func), \ "hidden rate wrong in exponential-exponential rbm" assert be.allclose(visible_rate, visible_rate_func), \ "visible rate wrong in exponential-exponential rbm"
def test_bernoulli_conditional_params(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute conditional parameters hidden_field = be.dot(vdata, W) # (batch_size, num_hidden_units) hidden_field += b visible_field = be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units) visible_field += a # compute conditional parameters with layer funcitons hidden_field_layer = rbm.layers[1]._conditional_params( [vdata], [rbm.weights[0].W()]) visible_field_layer = rbm.layers[0]._conditional_params( [hdata], [rbm.weights[0].W_T()]) assert be.allclose(hidden_field, hidden_field_layer), \ "hidden field wrong in bernoulli-bernoulli rbm" assert be.allclose(visible_field, visible_field_layer), \ "visible field wrong in bernoulli-bernoulli rbm"
def test_exponential_update(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.ExponentialLayer(num_visible_units) hid_layer = layers.ExponentialLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters # for the exponential layers, we need a > 0, b > 0, and W < 0 a = be.rand((num_visible_units, )) b = be.rand((num_hidden_units, )) W = -be.rand((num_visible_units, num_hidden_units)) rbm.layers[0].int_params['loc'] = a rbm.layers[1].int_params['loc'] = b rbm.weights[0].int_params['matrix'] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute extrinsic parameters hidden_rate = -be.dot(vdata, W) # (batch_size, num_hidden_units) hidden_rate += be.broadcast(b, hidden_rate) visible_rate = -be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units) visible_rate += be.broadcast(a, visible_rate) # update the extrinsic parameter using the layer functions rbm.layers[1].update(vdata, rbm.weights[0].W()) rbm.layers[0].update(hdata, be.transpose(rbm.weights[0].W())) assert be.allclose(hidden_rate, rbm.layers[1].ext_params['rate']), \ "hidden rate wrong in exponential-exponential rbm" assert be.allclose(visible_rate, rbm.layers[0].ext_params['rate']), \ "visible rate wrong in exponential-exponential rbm"
def test_pca_save_read_num_components(): # create some random data num_samples = 10000 dim = 10 batch_size = 100 num_components = 3 num_components_save = 2 # generate some data mean = np.random.random(dim) cov_factor = np.random.random((dim, dim)) cov = np.dot(cov_factor, cov_factor.T) samples = be.float_tensor( np.random.multivariate_normal(mean, cov, size=num_samples)) samples_train, samples_validate = batch.split_tensor(samples, 0.9) data = batch.Batch({ 'train': batch.InMemoryTable(samples_train, batch_size), 'validate': batch.InMemoryTable(samples_validate, batch_size) }) # find the principal directions pca = factorization.PCA.from_batch(data, num_components, epochs=10, grad_steps_per_minibatch=1, stepsize=0.01) # save it pca_file = tempfile.NamedTemporaryFile() store = pd.HDFStore(pca_file.name, mode="w") pca.save(store, num_components_save=num_components_save) # read it pca_read = factorization.PCA.from_saved(store) store.close() # check it assert be.allclose(pca.W[:, :num_components_save], pca_read.W) assert be.allclose(pca.var[:num_components_save], pca_read.var) assert pca.stepsize == pca_read.stepsize assert pca_read.num_components == num_components_save
def test_bernoulli_update(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].int_params.loc[:] = a rbm.layers[1].int_params.loc[:] = b rbm.weights[0].int_params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute extrinsic parameters hidden_field = be.dot(vdata, W) # (batch_size, num_hidden_units) hidden_field += be.broadcast(b, hidden_field) visible_field = be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units) visible_field += be.broadcast(a, visible_field) # update the extrinsic parameter using the layer functions rbm.layers[0].update([hdata], [rbm.weights[0].W_T()]) rbm.layers[1].update([vdata], [rbm.weights[0].W()]) assert be.allclose(hidden_field, rbm.layers[1].ext_params.field), \ "hidden field wrong in bernoulli-bernoulli rbm" assert be.allclose(visible_field, rbm.layers[0].ext_params.field), \ "visible field wrong in bernoulli-bernoulli rbm"
def test_mean_variance(): # create some random data s = be.rand((100000, )) # reference result ref_mean = be.mean(s) ref_var = be.var(s) # do the online calculation mv = math_utils.MeanVarianceCalculator() for i in range(10): mv.update(s[i * 10000:(i + 1) * 10000]) assert be.allclose(be.float_tensor(np.array([ref_mean])), be.float_tensor(np.array([mv.mean]))) assert be.allclose(be.float_tensor(np.array([ref_var])), be.float_tensor(np.array([mv.var])), rtol=1e-4, atol=1e-7)
def test_mean_variance_2d(): # create some random data num = 10000 dim2 = 10 num_steps = 10 stepsize = num // num_steps s = be.rand((num,dim2)) # reference result ref_mean = be.mean(s, axis=0) ref_var = be.var(s, axis=0) # do the online calculation mv = math_utils.MeanVarianceArrayCalculator() for i in range(num_steps): mv.update(s[i*stepsize:(i+1)*stepsize]) assert be.allclose(ref_mean, mv.mean) assert be.allclose(ref_var, mv.var, rtol=1e-3, atol=1e-5)
def test_mean_variance_serialization(): # create some random data num = 100 dim2 = 10 num_steps = 10 stepsize = num // num_steps s = be.rand((num, dim2)) # do the online calculation mv = math_utils.MeanVarianceArrayCalculator() for i in range(num_steps): mv.update(s[i * stepsize:(i + 1) * stepsize]) df = mv.to_dataframe() mv_serial = math_utils.MeanVarianceArrayCalculator.from_dataframe(df) assert be.allclose(mv_serial.mean, mv.mean) assert be.allclose(mv_serial.var, mv.var) assert be.allclose(mv_serial.square, mv.square) assert mv_serial.num == mv.num
def test_mean_variance(): # create some random data num = 100000 num_steps = 10 stepsize = num // num_steps s = be.rand((num,)) # reference result ref_mean = be.mean(s) ref_var = be.var(s) # do the online calculation mv = math_utils.MeanVarianceCalculator() for i in range(num_steps): mv.update(s[i*stepsize:(i+1)*stepsize]) assert be.allclose(be.float_tensor(np.array([ref_mean])), be.float_tensor(np.array([mv.mean]))) assert be.allclose(be.float_tensor(np.array([ref_var])), be.float_tensor(np.array([mv.var])), rtol=1e-3, atol=1e-5)
def test_unclamped_DrivenSequentialMC(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 steps = 1 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units,)) b = be.randn((num_hidden_units,)) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.connections[0].weights.params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) data_state = State.from_visible(vdata, rbm) for u in ['markov_chain', 'mean_field_iteration', 'deterministic_iteration']: # set up the sampler with the visible layer clamped sampler = samplers.SequentialMC(rbm, updater=u) sampler.set_state(data_state) # update the sampler state and check the output sampler.update_state(steps) assert not be.allclose(data_state[0], sampler.state[0]), \ "visible layer is not clamped, and should get updated: {}".format(u) assert not be.allclose(data_state[1], sampler.state[1]), \ "hidden layer is not clamped, and should get updated: {}".format(u)
def test_in_memory_batch(): # create data num_rows = 10000 num_cols = 10 tensor = be.rand((num_rows, num_cols)) # read it back with Batch batch_size = 1000 num_train_batches = num_rows // batch_size with batch.Batch({ 'train': batch.InMemoryTable(tensor, batch_size), 'validate': batch.InMemoryTable(tensor, batch_size) }) as data: # loop through thrice, checking the data i_batch = 0 while True: # get the data try: batch_data_train = data.get("train") batch_data_validate = data.get("validate") except StopIteration: assert i_batch == num_train_batches i_batch = 0 data.reset_generator("all") break # check it assert be.allclose( batch_data_train, tensor[i_batch * batch_size:(i_batch + 1) * batch_size]) assert be.allclose( batch_data_validate, tensor[i_batch * batch_size:(i_batch + 1) * batch_size]) i_batch += 1
def test_mean_2d(): # create some random data num =5000 num_steps = 10 stepsize = num // num_steps s = be.rand((num,10)) # reference result ref_mean = be.mean(s, axis=0) # do the online calculation mv = math_utils.MeanArrayCalculator() for i in range(num_steps): mv.update(s[i*stepsize:(i+1)*stepsize], axis=0) assert be.allclose(ref_mean, mv.mean)
def test_pdist(): n = 500 a_shape = (1000, n) b_shape = (1000, n) # distance distributions a_mean, a_scale = 1, 1 b_mean, b_scale = -1, 1 be.set_seed() a = a_mean + a_scale * be.randn(a_shape) b = b_mean + b_scale * be.randn(b_shape) dists = math_utils.pdist(a, b) dists_t = math_utils.pdist(b, a) assert be.shape(dists) == (1000, 1000) assert be.allclose(be.transpose(dists_t), dists) assert be.mean(dists) > 2 * math.sqrt(n) and be.mean( dists) < 3 * math.sqrt(n)
def test_grbm_reload(): vis_layer = layers.BernoulliLayer(num_vis) hid_layer = layers.GaussianLayer(num_hid) # create some extrinsics grbm = model.Model([vis_layer, hid_layer]) with tempfile.NamedTemporaryFile() as file: # save the model store = pandas.HDFStore(file.name, mode='w') grbm.save(store) store.close() # reload store = pandas.HDFStore(file.name, mode='r') grbm_reload = model.Model.from_saved(store) store.close() # check the two models are consistent vis_data = vis_layer.random((num_samples, num_vis)) data_state = model.State.from_visible(vis_data, grbm) vis_orig = grbm.deterministic_iteration(1, data_state).units[0] vis_reload = grbm_reload.deterministic_iteration(1, data_state).units[0] assert be.allclose(vis_orig, vis_reload)
def test_gaussian_conditional_params(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.GaussianLayer(num_visible_units) hid_layer = layers.GaussianLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units,)) b = be.randn((num_hidden_units,)) log_var_a = 0.1 * be.randn((num_visible_units,)) log_var_b = 0.1 * be.randn((num_hidden_units,)) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.layers[0].params.log_var[:] = log_var_a rbm.layers[1].params.log_var[:] = log_var_b rbm.connections[0].weights.params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute the variance visible_var = be.exp(log_var_a) hidden_var = be.exp(log_var_b) # rescale the data vdata_scaled = vdata / visible_var hdata_scaled = hdata / hidden_var # test rescale assert be.allclose(vdata_scaled, rbm.layers[0].rescale(vdata)),\ "visible rescale wrong in gaussian-gaussian rbm" assert be.allclose(hdata_scaled, rbm.layers[1].rescale(hdata)),\ "hidden rescale wrong in gaussian-gaussian rbm" # compute the mean hidden_mean = be.dot(vdata_scaled, W) # (batch_size, num_hidden_units) hidden_mean += b visible_mean = be.dot(hdata_scaled, be.transpose(W)) # (batch_size, num_hidden_units) visible_mean += a # update the conditional parameters using the layer functions vis_mean_func, vis_var_func = rbm.layers[0].conditional_params( [hdata_scaled], [rbm.connections[0].W(trans=True)]) hid_mean_func, hid_var_func = rbm.layers[1].conditional_params( [vdata_scaled], [rbm.connections[0].W()]) assert be.allclose(visible_var, vis_var_func),\ "visible variance wrong in gaussian-gaussian rbm" assert be.allclose(hidden_var, hid_var_func),\ "hidden variance wrong in gaussian-gaussian rbm" assert be.allclose(visible_mean, vis_mean_func),\ "visible mean wrong in gaussian-gaussian rbm" assert be.allclose(hidden_mean, hid_mean_func),\ "hidden mean wrong in gaussian-gaussian rbm"
def test_independent(): """ Test sampling from an rbm with two layers connected by a weight matrix that contains all zeros, so that the layers are independent. Note: This test compares values estimated by *sampling* to values computed analytically. It can fail for small batch_size, or strict tolerances, even if everything is working propery. """ num_visible_units = 20 num_hidden_units = 10 batch_size = 1000 steps = 100 mean_tol = 0.2 corr_tol = 0.2 # set a seed for the random number generator be.set_seed() layer_types = [ layers.BernoulliLayer, layers.GaussianLayer] for layer_type in layer_types: # set up some layer and model objects vis_layer = layer_type(num_visible_units) hid_layer = layer_type(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.rand((num_visible_units,)) b = be.rand((num_hidden_units,)) W = be.zeros((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.connections[0].weights.params.matrix[:] = W if layer_type == layers.GaussianLayer: log_var_a = be.randn((num_visible_units,)) log_var_b = be.randn((num_hidden_units,)) rbm.layers[0].params.log_var[:] = log_var_a rbm.layers[1].params.log_var[:] = log_var_b # initialize a state state = State.from_model(batch_size, rbm) # run a markov chain to update the state state = rbm.markov_chain(steps, state) # compute the mean state_for_moments = State.from_model(1, rbm) sample_mean = [be.mean(state[i], axis=0) for i in range(state.len)] model_mean = [rbm.layers[i].conditional_mean( rbm._connected_rescaled_units(i, state_for_moments), rbm._connected_weights(i)) for i in range(rbm.num_layers)] # check that the means are roughly equal for i in range(rbm.num_layers): ave = sample_mean[i] close = be.allclose(ave, model_mean[i][0], rtol=mean_tol, atol=mean_tol) assert close, "{0} {1}: sample mean does not match model mean".format(layer_type, i) # check the cross correlation between the layers crosscov = be.cov(state[0], state[1]) norm = be.outer(be.std(state[0], axis=0), be.std(state[1], axis=0)) crosscorr = be.divide(norm, crosscov) assert be.tmax(be.tabs(crosscorr)) < corr_tol, "{} cross correlation too large".format(layer_type)
def compare_lists(a, b): return all([be.allclose(ai, bi) for ai, bi in zip(a, b)])
def test_gaussian_derivatives(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.GaussianLayer(num_visible_units) hid_layer = layers.GaussianLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units,)) b = be.randn((num_hidden_units,)) log_var_a = 0.1 * be.randn((num_visible_units,)) log_var_b = 0.1 * be.randn((num_hidden_units,)) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.layers[0].params.log_var[:] = log_var_a rbm.layers[1].params.log_var[:] = log_var_b rbm.connections[0].weights.params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) visible_var = be.exp(log_var_a) vdata_scaled = vdata / visible_var # compute the mean of the hidden layer hid_mean = rbm.layers[1].conditional_mean( [vdata_scaled], [rbm.connections[0].W()]) hidden_var = be.exp(log_var_b) hid_mean_scaled = rbm.layers[1].rescale(hid_mean) # compute the derivatives d_vis_loc = be.mean((a-vdata)/visible_var, axis=0) d_vis_logvar = -0.5 * be.mean(be.square(be.subtract(a, vdata)), axis=0) d_vis_logvar += be.batch_quadratic(hid_mean_scaled, be.transpose(W), vdata, axis=0) / len(vdata) d_vis_logvar /= visible_var d_hid_loc = be.mean((b-hid_mean)/hidden_var, axis=0) d_hid_logvar = -0.5 * be.mean(be.square(hid_mean - b), axis=0) d_hid_logvar += be.batch_quadratic(vdata_scaled, W, hid_mean, axis=0) / len(hid_mean) d_hid_logvar /= hidden_var d_W = -be.batch_outer(vdata_scaled, hid_mean_scaled) / len(vdata_scaled) # compute the derivatives using the layer functions vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.connections[0].W(trans=True)]) hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.connections[0].W()]) weight_derivs = rbm.connections[0].weights.derivatives(vdata_scaled, hid_mean_scaled) # compute simple weighted derivatives using the layer functions scale = 2 scale_func = partial(be.multiply, be.float_scalar(scale)) vis_derivs_scaled = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.connections[0].W(trans=True)], weighting_function=scale_func) hid_derivs_scaled = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.connections[0].W()], weighting_function=scale_func) weight_derivs_scaled = rbm.connections[0].weights.derivatives(vdata_scaled, hid_mean_scaled, weighting_function=scale_func) assert be.allclose(d_vis_loc, vis_derivs[0].loc), \ "derivative of visible loc wrong in gaussian-gaussian rbm" assert be.allclose(d_hid_loc, hid_derivs[0].loc), \ "derivative of hidden loc wrong in gaussian-gaussian rbm" assert be.allclose(d_vis_logvar, vis_derivs[0].log_var, rtol=1e-05, atol=1e-01), \ "derivative of visible log_var wrong in gaussian-gaussian rbm" assert be.allclose(d_hid_logvar, hid_derivs[0].log_var, rtol=1e-05, atol=1e-01), \ "derivative of hidden log_var wrong in gaussian-gaussian rbm" assert be.allclose(d_W, weight_derivs[0].matrix), \ "derivative of weights wrong in gaussian-gaussian rbm" assert be.allclose(scale * d_vis_loc, vis_derivs_scaled[0].loc), \ "weighted derivative of visible loc wrong in gaussian-gaussian rbm" assert be.allclose(scale * d_hid_loc, hid_derivs_scaled[0].loc), \ "weighted derivative of hidden loc wrong in gaussian-gaussian rbm" assert be.allclose(scale * d_vis_logvar, vis_derivs_scaled[0].log_var, rtol=1e-05, atol=1e-01), \ "weighted derivative of visible log_var wrong in gaussian-gaussian rbm" assert be.allclose(scale * d_hid_logvar, hid_derivs_scaled[0].log_var, rtol=1e-05, atol=1e-01), \ "weighted derivative of hidden log_var wrong in gaussian-gaussian rbm" assert be.allclose(scale * d_W, weight_derivs_scaled[0].matrix), \ "weighted derivative of weights wrong in gaussian-gaussian rbm"
def test_onehot_derivatives(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.OneHotLayer(num_visible_units) hid_layer = layers.OneHotLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units,)) b = be.randn((num_hidden_units,)) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.connections[0].weights.params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) vdata_scaled = rbm.layers[0].rescale(vdata) # compute the conditional mean of the hidden layer hid_mean = rbm.layers[1].conditional_mean([vdata], [rbm.connections[0].W()]) hid_mean_scaled = rbm.layers[1].rescale(hid_mean) # compute the derivatives d_visible_loc = -be.mean(vdata, axis=0) d_hidden_loc = -be.mean(hid_mean_scaled, axis=0) d_W = -be.batch_outer(vdata, hid_mean_scaled) / len(vdata) # compute the derivatives using the layer functions vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.connections[0].W(trans=True)]) hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.connections[0].W()]) weight_derivs = rbm.connections[0].weights.derivatives(vdata, hid_mean_scaled) # compute simple weighted derivatives using the layer functions scale = 2 scale_func = partial(be.multiply, be.float_scalar(scale)) vis_derivs_scaled = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.connections[0].W(trans=True)], weighting_function=scale_func) hid_derivs_scaled = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.connections[0].W()], weighting_function=scale_func) weight_derivs_scaled = rbm.connections[0].weights.derivatives(vdata, hid_mean_scaled, weighting_function=scale_func) assert be.allclose(d_visible_loc, vis_derivs[0].loc), \ "derivative of visible loc wrong in onehot-onehot rbm" assert be.allclose(d_hidden_loc, hid_derivs[0].loc), \ "derivative of hidden loc wrong in onehot-onehot rbm" assert be.allclose(d_W, weight_derivs[0].matrix), \ "derivative of weights wrong in onehot-onehot rbm" assert be.allclose(scale * d_visible_loc, vis_derivs_scaled[0].loc), \ "weighted derivative of visible loc wrong in onehot-onehot rbm" assert be.allclose(scale * d_hidden_loc, hid_derivs_scaled[0].loc), \ "weighted derivative of hidden loc wrong in onehot-onehot rbm" assert be.allclose(scale * d_W, weight_derivs_scaled[0].matrix), \ "weighted derivative of weights wrong in onehot-onehot rbm"
def test_gaussian_derivatives(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.GaussianLayer(num_visible_units) hid_layer = layers.GaussianLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) log_var_a = 0.1 * be.randn((num_visible_units, )) log_var_b = 0.1 * be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].int_params.loc[:] = a rbm.layers[1].int_params.loc[:] = b rbm.layers[0].int_params.log_var[:] = log_var_a rbm.layers[1].int_params.log_var[:] = log_var_b rbm.weights[0].int_params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) visible_var = be.exp(log_var_a) vdata_scaled = vdata / be.broadcast(visible_var, vdata) # compute the mean of the hidden layer rbm.layers[1].update([vdata_scaled], [rbm.weights[0].W()]) hidden_var = be.exp(log_var_b) hid_mean = rbm.layers[1].mean() hid_mean_scaled = rbm.layers[1].rescale(hid_mean) # compute the derivatives d_vis_loc = -be.mean(vdata_scaled, axis=0) d_vis_logvar = -0.5 * be.mean(be.square(be.subtract(a, vdata)), axis=0) d_vis_logvar += be.batch_dot( hid_mean_scaled, be.transpose(W), vdata, axis=0) / len(vdata) d_vis_logvar /= visible_var d_hid_loc = -be.mean(hid_mean_scaled, axis=0) d_hid_logvar = -0.5 * be.mean( be.square(hid_mean - be.broadcast(b, hid_mean)), axis=0) d_hid_logvar += be.batch_dot(vdata_scaled, W, hid_mean, axis=0) / len(hid_mean) d_hid_logvar /= hidden_var d_W = -be.batch_outer(vdata_scaled, hid_mean_scaled) / len(vdata_scaled) # compute the derivatives using the layer functions rbm.layers[1].update([vdata_scaled], [rbm.weights[0].W()]) rbm.layers[0].update([hid_mean_scaled], [rbm.weights[0].W_T()]) vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.weights[0].W()]) hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.weights[0].W_T()]) weight_derivs = rbm.weights[0].derivatives(vdata_scaled, hid_mean_scaled) assert be.allclose(d_vis_loc, vis_derivs.loc), \ "derivative of visible loc wrong in gaussian-gaussian rbm" assert be.allclose(d_hid_loc, hid_derivs.loc), \ "derivative of hidden loc wrong in gaussian-gaussian rbm" assert be.allclose(d_vis_logvar, vis_derivs.log_var, rtol=1e-05, atol=1e-01), \ "derivative of visible log_var wrong in gaussian-gaussian rbm" assert be.allclose(d_hid_logvar, hid_derivs.log_var, rtol=1e-05, atol=1e-01), \ "derivative of hidden log_var wrong in gaussian-gaussian rbm" assert be.allclose(d_W, weight_derivs.matrix), \ "derivative of weights wrong in gaussian-gaussian rbm"
def test_gaussian_update(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.GaussianLayer(num_visible_units) hid_layer = layers.GaussianLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) log_var_a = 0.1 * be.randn((num_visible_units, )) log_var_b = 0.1 * be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].int_params.loc[:] = a rbm.layers[1].int_params.loc[:] = b rbm.layers[0].int_params.log_var[:] = log_var_a rbm.layers[1].int_params.log_var[:] = log_var_b rbm.weights[0].int_params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute the variance visible_var = be.exp(log_var_a) hidden_var = be.exp(log_var_b) # rescale the data vdata_scaled = vdata / be.broadcast(visible_var, vdata) hdata_scaled = hdata / be.broadcast(hidden_var, hdata) # test rescale assert be.allclose(vdata_scaled, rbm.layers[0].rescale(vdata)),\ "visible rescale wrong in gaussian-gaussian rbm" assert be.allclose(hdata_scaled, rbm.layers[1].rescale(hdata)),\ "hidden rescale wrong in gaussian-gaussian rbm" # compute the mean hidden_mean = be.dot(vdata_scaled, W) # (batch_size, num_hidden_units) hidden_mean += be.broadcast(b, hidden_mean) visible_mean = be.dot(hdata_scaled, be.transpose(W)) # (batch_size, num_hidden_units) visible_mean += be.broadcast(a, visible_mean) # update the extrinsic parameters using the layer functions rbm.layers[0].update([hdata_scaled], [rbm.weights[0].W_T()]) rbm.layers[1].update([vdata_scaled], [rbm.weights[0].W()]) assert be.allclose(visible_var, rbm.layers[0].ext_params.variance),\ "visible variance wrong in gaussian-gaussian rbm" assert be.allclose(hidden_var, rbm.layers[1].ext_params.variance),\ "hidden variance wrong in gaussian-gaussian rbm" assert be.allclose(visible_mean, rbm.layers[0].ext_params.mean),\ "visible mean wrong in gaussian-gaussian rbm" assert be.allclose(hidden_mean, rbm.layers[1].ext_params.mean),\ "hidden mean wrong in gaussian-gaussian rbm"