def test(self): a = chainer.Variable(np.random.rand(1).astype(np.float32)) b = chainer.Variable(np.random.rand(1).astype(np.float32)) # No old-style function y = 2 * a + b old_style_funcs = trpo._find_old_style_function([y]) self.assertEqual(old_style_funcs, []) # One old-style function y = 2 * old_style_identity(a) + b old_style_funcs = trpo._find_old_style_function([y]) self.assertEqual(len(old_style_funcs), 1) self.assertTrue(all(isinstance(f, OldStyleIdentity) for f in old_style_funcs)) # Three old-style functions y = (2 * old_style_identity(old_style_identity(a)) + old_style_identity(b)) old_style_funcs = trpo._find_old_style_function([y]) self.assertEqual(len(old_style_funcs), 3) self.assertTrue(all(isinstance(f, OldStyleIdentity) for f in old_style_funcs))
def test_first_order(self): # First order, so its Hessian will contain None params, y = self._generate_params_and_first_order_output() old_style_funcs = trpo._find_old_style_function([y]) if old_style_funcs: self.skipTest("\ Chainer v{} does not support double backprop of these functions: {}.".format( chainer.__version__, old_style_funcs)) vec = np.random.rand(4).astype(np.float32) # Hessian-vector product computation should raise an error due to None with self.assertRaises(AssertionError): compute_hessian_vector_product(y, params, vec)
def make_model(self, env): n_hidden_channels = 20 n_dim_obs = env.observation_space.low.size v = v_functions.FCVFunction( n_dim_obs, n_hidden_layers=1, n_hidden_channels=n_hidden_channels, nonlinearity=F.tanh, last_wscale=0.01, ) if self.discrete: n_actions = env.action_space.n pi = policies.FCSoftmaxPolicy( n_dim_obs, n_actions, n_hidden_layers=1, n_hidden_channels=n_hidden_channels, nonlinearity=F.tanh, last_wscale=0.01, ) else: n_dim_actions = env.action_space.low.size pi = policies.FCGaussianPolicyWithStateIndependentCovariance( n_dim_obs, n_dim_actions, n_hidden_layers=1, n_hidden_channels=n_hidden_channels, nonlinearity=F.tanh, mean_wscale=0.01, var_type='diagonal', ) # Check if KL div supports double-backprop fake_obs = np.zeros_like(env.observation_space.low, dtype=np.float32) action_distrib = pi(fake_obs[None]) kl = action_distrib.kl(action_distrib) old_style_funcs = trpo._find_old_style_function([kl]) if old_style_funcs: self.skipTest("\ Chainer v{} does not support double backprop of these functions: {}.".format( chainer.__version__, old_style_funcs)) return pi, v
def test_second_order(self): # Second order, so its Hessian will be non-zero params, y = self._generate_params_and_second_order_output() old_style_funcs = trpo._find_old_style_function([y]) if old_style_funcs: self.skipTest("\ Chainer v{} does not support double backprop of these functions: {}.".format( chainer.__version__, old_style_funcs)) def test_hessian_vector_product_nonzero(vec): hvp = compute_hessian_vector_product(y, params, vec) hessian = compute_hessian(y, params) self.assertGreater(np.count_nonzero(hvp), 0) self.assertGreater(np.count_nonzero(hessian), 0) np.testing.assert_allclose(hvp, hessian.dot(vec), atol=1e-3) # Test with two different random vectors, reusing y test_hessian_vector_product_nonzero( np.random.rand(4).astype(np.float32)) test_hessian_vector_product_nonzero( np.random.rand(4).astype(np.float32))