def test_pca_compare_var(): # create some random data num_samples = 10000 dim = 10 batch_size = 100 num_components = 3 # generate some data mean = np.random.random(dim) cov_factor = np.random.random((dim, dim)) cov = np.dot(cov_factor, cov_factor.T) samples = be.float_tensor( np.random.multivariate_normal(mean, cov, size=num_samples)) samples_train, samples_validate = batch.split_tensor(samples, 0.9) data = batch.Batch({ 'train': batch.InMemoryTable(samples_train, batch_size), 'validate': batch.InMemoryTable(samples_validate, batch_size) }) # find the principal directions pca_sgd = factorization.PCA.from_batch(data, num_components, epochs=10, grad_steps_per_minibatch=1, stepsize=0.01) pca_svd = factorization.PCA.from_svd(samples_train, num_components) assert be.norm(pca_sgd.var - pca_svd.var) / be.norm(pca_sgd.var) < 1e-1
def pdist(x: be.Tensor, y: be.Tensor) -> be.Tensor: """ Compute the pairwise distance matrix between the rows of x and y. Args: x (tensor (num_samples_1, num_units)) y (tensor (num_samples_2, num_units)) Returns: tensor (num_samples_1, num_samples_2) """ inner = be.dot(x, be.transpose(y)) x_mag = be.norm(x, axis=1)**2 y_mag = be.norm(y, axis=1)**2 squared = be.add(be.unsqueeze(y_mag, axis=0), be.add(be.unsqueeze(x_mag, axis=1), -2 * inner)) return be.sqrt(be.clip(squared, a_min=0))
def weight_norm_histogram(rbm, show_plot=False, filename=None): import matplotlib.pyplot as plt import seaborn as sns fig, ax = plt.subplots() for l in range(rbm.num_connections): num_inputs = rbm.connections[l].shape[0] norm = be.to_numpy_array(be.norm(rbm.connections[l].weights.W(), axis=0) / sqrt(num_inputs)) sns.distplot(norm, ax=ax, label=str(l)) ax.legend() if show_plot: fig if filename is not None: fig.savefig(filename) plt.close(fig)
cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps, beta_std=beta_std, burn_in=1) # evaluate the model util.show_metrics(rbm, cd.monitor) valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, n_recon=10, vertical=False) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5) util.show_weights(rbm, show_plot, n_weights=100) # close the HDF5 store data.close() print("Done") return rbm if __name__ == "__main__": rbm = run(show_plot = True) import seaborn import matplotlib.pyplot as plt for conn in rbm.connections: c = be.corr(conn.weights.W(), conn.weights.W()) fig, ax = plt.subplots() seaborn.heatmap(be.to_numpy_array(c), vmin=-1, vmax=1, ax=ax) fig n = be.norm(conn.weights.W(), axis=0) fig, ax = plt.subplots() seaborn.distplot(be.to_numpy_array(n), ax=ax) fig
def test_l2_normalize(): result_pre = [ be.norm(pre.l2_normalize(tensor), axis=1) for tensor in tensors ] result_ref = [be.ones((len(tensor), )) for tensor in tensors] assert compare_lists(result_pre, result_ref)