def test_param_space_noise(self):
        env = TfEnv(normalize(PointEnvMAML()))
        obs = env.reset()

        policy = MAMLImprovedGaussianMLPPolicy(name="policy33",
                                               env_spec=env.spec,
                                               hidden_sizes=(16, 16),
                                               hidden_nonlinearity=tf.nn.tanh,
                                               param_noise_std=0.0)

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        import rllab.misc.logger as logger

        logger.set_snapshot_dir('/tmp/')
        logger.set_snapshot_mode('last')

        algo = MAMLTRPO(
            env=env,
            policy=policy,
            baseline=baseline,
            batch_size=3,
            max_path_length=10,
            meta_batch_size=4,
            num_grad_updates=1,
            n_itr=1,
            discount=0.99,
            step_size=0.01,
        )
        algo.train()

        tf.reset_default_graph()
        pkl_file = os.path.join('/tmp/', 'params.pkl')
        with tf.Session() as sess:
            data = joblib.load(pkl_file)
            policy = data['policy']
            action_1 = policy.get_action(obs)[1]['mean']
            action_2 = policy.get_action(obs)[1]['mean']
            diff = np.sum((action_1 - action_2)**2)

            self.assertAlmostEquals(diff, 0.0)

            action_1 = policy.get_action(obs, param_noise_std=1.0)[1]['mean']
            action_2 = policy.get_action(obs, param_noise_std=1.0)[1]['mean']
            diff = np.sum((action_1 - action_2)**2)

            self.assertGreaterEqual(diff, 0.1)

            policy.param_noise_std = 1.0
            action_1 = policy.get_action(obs)[1]['mean']
            action_2 = policy.get_action(obs)[1]['mean']
            diff = np.sum((action_1 - action_2)**2)
            self.assertGreaterEqual(diff, 0.1)
    def test_serialization(self):

        env = TfEnv(normalize(PointEnvMAML()))
        obs = env.reset()

        policy = MAMLImprovedGaussianMLPPolicy(
            name="policy56",
            env_spec=env.spec,
            hidden_sizes=(16, 16),
            hidden_nonlinearity=tf.nn.tanh,
        )

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        import rllab.misc.logger as logger

        logger.set_snapshot_dir('/tmp/')
        logger.set_snapshot_mode('last')

        algo = MAMLTRPO(
            env=env,
            policy=policy,
            baseline=baseline,
            batch_size=2,
            max_path_length=10,
            meta_batch_size=4,
            num_grad_updates=1,
            n_itr=1,
            discount=0.99,
            step_size=0.01,
        )
        algo.train()

        tf.reset_default_graph()
        pkl_file = os.path.join('/tmp/', 'params.pkl')
        with tf.Session() as sess:
            data = joblib.load(pkl_file)
            policy = data['policy']
            action_before = policy.get_action(obs)[1]['mean']

            dump_string = pickle.dumps(policy)

        tf.reset_default_graph()
        with tf.Session() as sess:
            policy_loaded = pickle.loads(dump_string)
            action_after = policy_loaded.get_action(obs)[1]['mean']

        diff = np.sum(np.abs(action_before - action_after))
        self.assertAlmostEquals(diff, 0.0, places=3)
    def test_get_mean_stepsize(self):

        env = TfEnv(normalize(PointEnvMAML()))
        obs = env.reset()

        policy = MAMLImprovedGaussianMLPPolicy(name="policy2",
                                               env_spec=env.spec,
                                               hidden_sizes=(16, 16),
                                               hidden_nonlinearity=tf.nn.tanh,
                                               trainable_step_size=True,
                                               grad_step_size=0.7)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            mean_stepsize_1 = policy.get_mean_step_size()

        self.assertAlmostEquals(mean_stepsize_1, 0.7, places=5)