def __init__(self, *args): self.n_unit = len(args) super().__init__() for i in range(self.n_unit - 1): self.parameter["w_encode{i}"] = nn.Parameter( np.random.randn(args[i], args[i + 1])) self.parameter["b_encode{i}"] = nn.Parameter(np.zeros(args[i + 1])) self.parameter["w_decode{i}"] = nn.Parameter( np.random.randn(args[i + 1], args[i])) self.parameter["b_decode{i}"] = nn.Parameter(np.zeros(args[i]))
def test_power(self): x = nn.Parameter(2.) y = 2**x self.assertEqual(y.value, 4) y.backward() self.assertEqual(x.grad, 4 * np.log(2)) x = np.random.rand(10, 2) xp = nn.Parameter(x) y = xp**3 self.assertTrue((y.value == x**3).all()) y.backward(np.ones((10, 2))) self.assertTrue(np.allclose(xp.grad, 3 * x**2))
def test_exp(self): x = nn.Parameter(2.) y = nn.exp(x) self.assertEqual(y.value, np.exp(2)) y.backward() self.assertEqual(x.grad, np.exp(2)) x = np.random.rand(5, 3) p = nn.Parameter(x) y = nn.exp(p) self.assertTrue((y.value == np.exp(x)).all()) y.backward(np.ones((5, 3))) self.assertTrue((p.grad == np.exp(x)).all())
def test_log(self): x = nn.Parameter(2.) y = nn.log(x) self.assertEqual(y.value, np.log(2)) y.backward() self.assertEqual(x.grad, 0.5) x = np.random.rand(4, 6) p = nn.Parameter(x) y = nn.log(p) self.assertTrue((y.value == np.log(x)).all()) y.backward(np.ones((4, 6))) self.assertTrue((p.grad == 1 / x).all())
def test_negative(self): x = nn.Parameter(2.) y = -x self.assertEqual(y.value, -2) y.backward() self.assertEqual(x.grad, -1) x = np.random.rand(2, 3) xp = nn.Parameter(x) y = -xp self.assertTrue((y.value == -x).all()) y.backward(np.ones((2, 3))) self.assertTrue((xp.grad == -np.ones((2, 3))).all())
def test_multiply(self): x = nn.Parameter(2) y = x * 5 self.assertEqual(y.value, 10) y.backward() self.assertEqual(x.grad, 5) x = np.random.rand(5, 4) y = np.random.rand(4) yp = nn.Parameter(y) z = x * yp self.assertTrue((z.value == x * y).all()) z.backward(np.ones((5, 4))) self.assertTrue((yp.grad == x.sum(axis=0)).all())
def test_forward_backward(self): x = nn.Parameter(2) z = x - 5 self.assertEqual(z.value, -3) z.backward() self.assertEqual(x.grad, 1) x = np.random.rand(5, 4) y = np.random.rand(4) p = nn.Parameter(y) z = x - p self.assertTrue((z.value == x - y).all()) z.backward(np.ones((5, 4))) self.assertTrue((p.grad == -np.ones(4) * 5).all())
def test_add(self): x = nn.Parameter(2) z = x + 5 self.assertEqual(z.value, 7) z.backward() self.assertEqual(x.grad, 1) x = np.random.rand(5, 4) y = np.random.rand(4) p = nn.Parameter(y) z = x + p self.assertTrue((z.value == x + y).all()) z.backward(np.ones((5, 4))) self.assertTrue((p.grad == np.ones(4) * 5).all())
def test_matmul(self): x = np.random.rand(10, 3) y = np.random.rand(3, 5) g = np.random.rand(10, 5) xp = nn.Parameter(x) z = xp @ y self.assertTrue((z.value == x @ y).all()) z.backward(g) self.assertTrue((xp.grad == g @ y.T).all()) yp = nn.Parameter(y) z = x @ yp self.assertTrue((z.value == x @ y).all()) z.backward(g) self.assertTrue((yp.grad == x.T @ g).all())
def test_divide(self): x = nn.Parameter(10.) z = x / 2 self.assertEqual(z.value, 5) z.backward() self.assertEqual(x.grad, 0.5) x = np.random.rand(5, 10, 3) y = np.random.rand(10, 1) p = nn.Parameter(y) z = x / p self.assertTrue((z.value == x / y).all()) z.backward(np.ones((5, 10, 3))) d = np.sum(-x / y**2, axis=0).sum(axis=1, keepdims=True) self.assertTrue((p.grad == d).all())
def test_laplace(self): obs = np.arange(3) loc = nn.Parameter(0) s = nn.Parameter(1) for _ in range(1000): loc.cleargrad() s.cleargrad() x = nn.random.Laplace(loc, nn.softplus(s), data=obs) x.log_pdf().sum().backward() loc.value += loc.grad * 0.01 s.value += s.grad * 0.01 self.assertAlmostEqual(x.loc.value, np.median(obs), places=1) self.assertAlmostEqual(x.scale.value, np.mean(np.abs(obs - x.loc.value)), places=1)
def test_cauchy(self): np.random.seed(1234) obs = np.random.standard_cauchy(size=10000) obs = 2 * obs + 1 loc = nn.Parameter(0) s = nn.Parameter(1) for _ in range(100): loc.cleargrad() s.cleargrad() x = nn.random.Cauchy(loc, nn.softplus(s), data=obs) x.log_pdf().sum().backward() loc.value += loc.grad * 0.001 s.value += s.grad * 0.001 self.assertAlmostEqual(x.loc.value, 1, places=1) self.assertAlmostEqual(x.scale.value, 2, places=1)
def test_trace(self): arrays = [np.random.normal(size=(2, 2)), np.random.normal(size=(3, 4))] for arr in arrays: arr = nn.Parameter(arr) tr_arr = nn.linalg.trace(arr) self.assertEqual(tr_arr.value, np.trace(arr.value)) a = np.array([[1.5, 0], [-0.1, 1.1]]) a = nn.Parameter(a) for _ in range(100): a.cleargrad() loss = nn.square(nn.linalg.trace(a) - 2) loss.backward() a.value -= 0.1 * a.grad self.assertEqual(nn.linalg.trace(a).value, 2)
def test_broadcast(self): x = nn.Parameter(np.ones((1, 1))) shape = (5, 2, 3) y = broadcast_to(x, shape) self.assertEqual(y.shape, shape) y.backward(np.ones(shape)) self.assertTrue((x.grad == np.ones((1, 1)) * 30).all())
def test_solve(self): A = np.array([[2., 1.], [1., 3.]]) B = np.array([1., 2.])[:, None] AinvB = np.linalg.solve(A, B) self.assertTrue((AinvB == nn.linalg.solve(A, B).value).all()) A = nn.Parameter(A) B = nn.Parameter(B) for _ in range(100): A.cleargrad() B.cleargrad() AinvB = nn.linalg.solve(A, B) loss = nn.square(AinvB - 1).sum() loss.backward() A.value -= A.grad B.value -= B.grad self.assertTrue(np.allclose(AinvB.value, 1))
def test_gamma(self): self.assertEqual(24, nn.gamma(5).value) a = nn.Parameter(2.5) eps = 1e-5 b = nn.gamma(a) b.backward() num_grad = ((nn.gamma(a + eps) - nn.gamma(a - eps)) / (2 * eps)).value self.assertAlmostEqual(a.grad, num_grad)
def test_reshape(self): self.assertRaises(ValueError, nn.reshape, 1, (2, 3)) x = np.random.rand(2, 6) p = nn.Parameter(x) y = p.reshape(3, 4) self.assertTrue((x.reshape(3, 4) == y.value).all()) y.backward(np.ones((3, 4))) self.assertTrue((p.grad == np.ones((2, 6))).all())
def test_split(self): x = np.random.rand(10, 7) a = nn.Parameter(x) b, c = nn.split(a, (3, ), axis=-1) self.assertTrue((b.value == x[:, :3]).all()) self.assertTrue((c.value == x[:, 3:]).all()) b.backward(np.ones((10, 3))) self.assertIs(a.grad, None) c.backward(np.ones((10, 4))) self.assertTrue((a.grad == np.ones((10, 7))).all())
def test_bernoulli(self): np.random.seed(1234) obs = np.random.choice(2, 1000, p=[0.1, 0.9]) a = nn.Parameter(0) for _ in range(100): a.cleargrad() x = nn.random.Bernoulli(logit=a, data=obs) x.log_pdf().sum().backward() a.value += a.grad * 0.01 self.assertAlmostEqual(x.mu.value, np.mean(obs))
def test_exponential(self): np.random.seed(1234) obs = np.random.gamma(1, 1 / 0.5, size=1000) a = nn.Parameter(0) for _ in range(100): a.cleargrad() x = nn.random.Exponential(nn.softplus(a), data=obs) x.log_pdf().sum().backward() a.value += a.grad * 0.001 self.assertAlmostEqual(x.rate.value, 0.475135117)
def test_flatten(self): self.assertRaises(TypeError, nn.flatten, "abc") self.assertRaises(ValueError, nn.flatten, np.ones(1)) x = np.random.rand(5, 4) p = nn.Parameter(x) y = p.flatten() self.assertTrue((y.value == x.flatten()).all()) y.backward(np.ones(20)) self.assertTrue((p.grad == np.ones((5, 4))).all())
def test_categorical(self): np.random.seed(1234) obs = np.random.choice(3, 100, p=[0.2, 0.3, 0.5]) obs = np.eye(3)[obs] a = nn.Parameter(np.zeros(3)) for _ in range(100): a.cleargrad() x = nn.random.Categorical(logit=a, data=obs) x.log_pdf().sum().backward() a.value += 0.01 * a.grad self.assertTrue(np.allclose(np.mean(obs, 0), x.mu.value))
def test_abs(self): np.random.seed(1234) x = nn.Parameter(np.random.randn(5, 7)) sign = np.sign(x.value) y = nn.abs(x) self.assertTrue((y.value == np.abs(x.value)).all()) for _ in range(10000): x.cleargrad() y = nn.abs(x) nn.square(y - 0.01).sum().backward() x.value -= x.grad * 0.001 self.assertTrue(np.allclose(x.value, 0.01 * sign))
def test_determinant(self): A = np.array([[2., 1.], [1., 3.]]) detA = np.linalg.det(A) self.assertTrue((detA == nn.linalg.det(A).value).all()) A = nn.Parameter(A) for _ in range(100): A.cleargrad() detA = nn.linalg.det(A) loss = nn.square(detA - 1) loss.backward() A.value -= 0.1 * A.grad self.assertAlmostEqual(detA.value, 1.)
def test_transpose(self): arrays = [ np.random.normal(size=(2, 3)), np.random.normal(size=(2, 3, 4)) ] axes = [None, (2, 0, 1)] for arr, ax in zip(arrays, axes): arr = nn.Parameter(arr) arr_t = nn.transpose(arr, ax) self.assertEqual(arr_t.shape, np.transpose(arr.value, ax).shape) da = np.random.normal(size=arr_t.shape) arr_t.backward(da) self.assertEqual(arr.grad.shape, arr.shape)
def test_multivariate_gaussian(self): self.assertRaises(ValueError, nn.random.MultivariateGaussian, np.zeros(2), np.eye(3)) self.assertRaises(ValueError, nn.random.MultivariateGaussian, np.zeros(2), np.eye(2) * -1) x_train = np.array([[1., 1.], [1., -1], [-1., 1.], [-1., -2.]]) mu = nn.Parameter(np.ones(2)) cov = nn.Parameter(np.eye(2) * 2) for _ in range(1000): mu.cleargrad() cov.cleargrad() x = nn.random.MultivariateGaussian(mu, cov + cov.transpose(), data=x_train) log_likelihood = x.log_pdf().sum() log_likelihood.backward() mu.value += 0.1 * mu.grad cov.value += 0.1 * cov.grad self.assertTrue(np.allclose(mu.value, x_train.mean(axis=0))) self.assertTrue( np.allclose(np.cov(x_train, rowvar=False, bias=True), x.cov.value))
def test_mean(self): x = np.random.rand(5, 1, 2) xp = nn.Parameter(x) z = xp.mean() self.assertEqual(z.value, x.mean()) z.backward() self.assertTrue((xp.grad == np.ones((5, 1, 2)) / 10).all()) xp.cleargrad() z = xp.mean(axis=0, keepdims=True) self.assertEqual(z.shape, (1, 1, 2)) self.assertTrue((z.value == x.mean(axis=0, keepdims=True)).all()) z.backward(np.ones((1, 1, 2))) self.assertTrue((xp.grad == np.ones((5, 1, 2)) / 5).all())
def test_inverse(self): A = np.array([[2., 1.], [1., 3.]]) Ainv = np.linalg.inv(A) self.assertTrue((Ainv == nn.linalg.inv(A).value).all()) B = np.array([[-1., 1.], [1., 0.5]]) A = nn.Parameter(np.array([[-0.4, 0.7], [0.7, 0.7]])) for _ in range(100): A.cleargrad() Ainv = nn.linalg.inv(A) loss = nn.square(Ainv - B).sum() loss.backward() A.value -= 0.1 * A.grad self.assertTrue(np.allclose(A.value, np.linalg.inv(B)))
def test_cholesky(self): A = np.array([[2., -1], [-1., 5.]]) L = np.linalg.cholesky(A) Ap = nn.Parameter(A) L_test = nn.linalg.cholesky(Ap) self.assertTrue((L == L_test.value).all()) T = np.array([[1., 0.], [-1., 2.]]) for _ in range(1000): Ap.cleargrad() L_ = nn.linalg.cholesky(Ap) loss = nn.square(T - L_).sum() loss.backward() Ap.value -= 0.1 * Ap.grad self.assertTrue(np.allclose(Ap.value, T @ T.T))
def test_dirichlet(self): np.random.seed(1234) obs = np.random.choice(3, 100, p=[0.2, 0.3, 0.5]) obs = np.eye(3)[obs] a = nn.Parameter(np.zeros(3)) for _ in range(100): a.cleargrad() mu = nn.softmax(a) d = nn.random.Dirichlet(np.ones(3) * 10, data=mu) x = nn.random.Categorical(mu, data=obs) log_posterior = x.log_pdf().sum() + d.log_pdf().sum() log_posterior.backward() a.value += 0.01 * a.grad count = np.sum(obs, 0) + 10 p = count / count.sum(keepdims=True) self.assertTrue(np.allclose(p, mu.value, 1e-2, 1e-2))