def gaussian(): u = g.Var(shape=(2, )) half_sigma = g.Low(shape=(2, 2)) sigma = half_sigma @ half_sigma.T var = [u, half_sigma] x = g.Inp(name='x', shape=(2, )) neg_log_pdf = 1 / 2 * g.log( g.det(sigma)) + 1 / 2 * (x - u) @ g.inv(sigma) @ (x - u).T log_like = g.trmean(neg_log_pdf) data = np.random.multivariate_normal(mean=np.array([3, -3]), cov=np.array([[5, -2], [-2, 2]]), size=(1000, ), check_valid='warn') print('real param') print(np.array([3, -3])) print(np.array([[5, -2], [-2, 2]])) inp_dict = {'x': data} lr = 0.1 for i in range(600): ret = log_like.forward(inp_dict) if (i + 1) % 100 == 0: print(ret) print(u.val) sigma = half_sigma.val @ half_sigma.val.T print(sigma) log_like.update() for v in var: v.apply_gradient(lr) return
def gaussian(): s = g.Inp(name='s', shape=(2, )) #v, mu, half_sigma = vi.full_gauss(s, 2) #var_param = [mu, half_sigma] v, mu, log_sigma = vi.mean_gauss(s, 2) var_param = [mu, log_sigma] x = g.Inp(name='x', shape=(2, )) x_sigma = np.array([[2, 3], [3, 5]]) #x_sigma = np.array([[2, 0], [0, 5]]) #log_pdf = g.mean(g.tr(1/2*[email protected])+g.tr(1/2*(x-v)@np.linalg.inv(x_sigma)@(x-v).T)-g.log(g.abs(g.det(half_sigma))))-1/2*g.log(g.abs(g.det(half_sigma@half_sigma.T))) log_pdf = g.mean( g.tr(1 / 2 * v @ v.T) + g.tr(1 / 2 * (x - v) @ np.linalg.inv(x_sigma) @ (x - v).T)) x_mu = np.array([-3, -5]) data = np.random.multivariate_normal(mean=x_mu, cov=x_sigma, size=(1000, )) lr = 0.01 var_gs = [0] * len(var_param) for i in range(1000): np.random.shuffle(data) s_sample = np.random.multivariate_normal(mean=np.array([0, 0]), cov=np.array([[1, 0], [0, 1]]), size=( 10, 1, )) inp_dict = {'x': data, 's': s_sample} ret = log_pdf.forward(inp_dict) log_pdf.update() #half_sigma.update(prev_grad=np.linalg.inv(-L_val.T)) log_sigma.update(prev_grad=-np.ones(2)) for k, var in enumerate(var_param): var_lr, var_gs[k] = vi.learning_rate(i, var.grad, lr, var_gs[k]) var.apply_gradient(lr) if (i + 1) % 100 == 0: print(ret) print(mu.val) #L_val = half_sigma.val #print(L_val@L_val.T) if (i + 1) % 500 == 0: lr /= 2
self.std = None self.log_alpha = None self.grad = 0 return def sparse(self): self.test = True log_alpha = self.log_sigma - 2 * np.log(np.abs(self.mu)) self.mu = self.mu * (log_alpha <= 0.7) return iterations = 100 batch_size = 128 x = g.Inp(name='x', shape=(batch_size, 28 * 28)) w1_init = np.load('w1.npy') w2_init = np.load('w2.npy') w3_init = np.load('w3.npy') ''' w1 = SparseVDLayer(x, batch_size=batch_size, name='w1', shape=(28*28, 300), activation=g.ReluOp, initial_value=w1_init) w2 = SparseVDLayer(w1, batch_size=batch_size, name='w2', shape=(300, 100), activation=g.ReluOp, initial_value=w2_init) w3 = SparseVDLayer(w2, batch_size=batch_size, name='w3', shape=(100, 10), initial_value=w3_init) ''' w1 = SparseVDLayer(x, batch_size=batch_size, name='w1', shape=(28 * 28, 300), activation=g.ReluOp, initial_value=None)
import numpy as np import grad as g x = g.Inp(name='x', shape=(None, 28 * 28)) w1 = g.Var(name='w1', shape=(28 * 28, 300)) w2 = g.Var(name='w2', shape=(300, 100)) w3 = g.Var(name='w3', shape=(100, 10)) var = [w1, w2, w3] logits = g.relu(g.relu(x @ w1) @ w2) @ w3 loss = g.softmax(logits) iterations = 100 batch_size = 64 train_x = np.load('/Users/gyc/Machine Learning/data/mnist/train_images.npy') train_labels = np.load( '/Users/gyc/Machine Learning/data/mnist/train_labels.npy') test_x = np.load('/Users/gyc/Machine Learning/data/mnist/test_images.npy') test_labels = np.load('/Users/gyc/Machine Learning/data/mnist/test_labels.npy') train_size = train_x.shape[0] train_x = train_x.reshape(train_size, -1) / 255 test_size = test_x.shape[0] test_x = test_x.reshape(test_size, -1) / 255 lr = 0.01 / batch_size for _ in range(iterations): idx = np.arange(train_size)
var.grad = 0 new_log_prob = log_prob.forward(inp_dict) log_prob.update() r -= e / 2 * var.grad var.grad = 0 acc_prob = min( 1, np.exp(new_log_prob - 1 / 2 * r @ r - old_log_prob + 1 / 2 * r0 @ r0)) if np.random.uniform() > acc_prob: var.val = old_val sample_array[m] = var.forward() return sample_array if __name__ == '__main__': u = g.Var(shape=(2, )) x_sigma = np.array([[2, 3], [3, 5]]) x = g.Inp(name='x', shape=(2, )) data = np.random.multivariate_normal(mean=np.array([-3, -5]), cov=x_sigma, size=(1000, )) log_prob = 1 / 2 * u @ u.T + g.tr( 1 / 2 * (x - u) @ np.linalg.inv(x_sigma) @ (x - u).T) inp_dict = {'x': data} L = 100 M = 100 e = 1e-3 sample_array = sample(log_prob, u, L, M, e, inp_dict) print(np.mean(sample_array, axis=0))